diff --git a/cecli/coders/__init__.py b/cecli/coders/__init__.py
index 3fe9c0e3373..e5693180774 100644
--- a/cecli/coders/__init__.py
+++ b/cecli/coders/__init__.py
@@ -1,42 +1,57 @@
-from .agent_coder import AgentCoder
-from .architect_coder import ArchitectCoder
-from .ask_coder import AskCoder
-from .base_coder import Coder
-from .context_coder import ContextCoder
-from .copypaste_coder import CopyPasteCoder
-from .editblock_coder import EditBlockCoder
-from .editblock_fenced_coder import EditBlockFencedCoder
-from .editor_diff_fenced_coder import EditorDiffFencedCoder
-from .editor_editblock_coder import EditorEditBlockCoder
-from .editor_whole_coder import EditorWholeFileCoder
-from .hashline_coder import HashLineCoder
-from .help_coder import HelpCoder
-from .patch_coder import PatchCoder
-from .sub_agent_coder import SubAgentCoder
-from .udiff_coder import UnifiedDiffCoder
-from .udiff_simple import UnifiedDiffSimpleCoder
-from .wholefile_coder import WholeFileCoder
-
-# from .single_wholefile_func_coder import SingleWholeFileFunctionCoder
+"""Coder module with lazy imports to reduce startup memory."""
__all__ = [
- HelpCoder,
- AskCoder,
- Coder,
- EditBlockCoder,
- EditBlockFencedCoder,
- WholeFileCoder,
- PatchCoder,
- UnifiedDiffCoder,
- UnifiedDiffSimpleCoder,
- # SingleWholeFileFunctionCoder,
- ArchitectCoder,
- EditorEditBlockCoder,
- EditorWholeFileCoder,
- EditorDiffFencedCoder,
- ContextCoder,
- AgentCoder,
- CopyPasteCoder,
- HashLineCoder,
- SubAgentCoder,
+ "HelpCoder",
+ "AskCoder",
+ "Coder",
+ "EditBlockCoder",
+ "EditBlockFencedCoder",
+ "WholeFileCoder",
+ "PatchCoder",
+ "UnifiedDiffCoder",
+ "UnifiedDiffSimpleCoder",
+ "ArchitectCoder",
+ "EditorEditBlockCoder",
+ "EditorWholeFileCoder",
+ "EditorDiffFencedCoder",
+ "ContextCoder",
+ "AgentCoder",
+ "CopyPasteCoder",
+ "HashLineCoder",
+ "SubAgentCoder",
]
+
+# Module name mapping (snake_case to class name)
+_MODULE_MAP = {
+ "HelpCoder": ".help_coder",
+ "AskCoder": ".ask_coder",
+ "Coder": ".base_coder",
+ "EditBlockCoder": ".editblock_coder",
+ "EditBlockFencedCoder": ".editblock_fenced_coder",
+ "WholeFileCoder": ".wholefile_coder",
+ "PatchCoder": ".patch_coder",
+ "UnifiedDiffCoder": ".udiff_coder",
+ "UnifiedDiffSimpleCoder": ".udiff_simple",
+ "ArchitectCoder": ".architect_coder",
+ "EditorEditBlockCoder": ".editor_editblock_coder",
+ "EditorWholeFileCoder": ".editor_whole_coder",
+ "EditorDiffFencedCoder": ".editor_diff_fenced_coder",
+ "ContextCoder": ".context_coder",
+ "AgentCoder": ".agent_coder",
+ "CopyPasteCoder": ".copypaste_coder",
+ "HashLineCoder": ".hashline_coder",
+ "SubAgentCoder": ".sub_agent_coder",
+}
+
+
+def __getattr__(name):
+ if name in _MODULE_MAP:
+ import importlib
+
+ mod = importlib.import_module(_MODULE_MAP[name], __package__)
+ return getattr(mod, name)
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def __dir__():
+ return __all__
diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
index 2f558dcb8fe..94d61738442 100644
--- a/cecli/coders/agent_coder.py
+++ b/cecli/coders/agent_coder.py
@@ -194,6 +194,7 @@ def _get_agent_config(self):
"todo_list",
"sub_agents",
"skills",
+ "servers",
},
)
)
@@ -357,6 +358,7 @@ def _calculate_context_block_tokens(self, force=False):
"git_status",
"symbol_outline",
"skills",
+ "servers",
"sub_agents",
"loaded_skills",
]
@@ -391,6 +393,8 @@ def _generate_context_block(self, block_name):
content = self.get_todo_list()
elif block_name == "skills":
content = self.get_skills_context()
+ elif block_name == "servers":
+ content = self.get_servers_context()
elif block_name == "loaded_skills":
content = self.get_skills_content()
elif block_name == "sub_agents" and (
@@ -624,8 +628,8 @@ def get_context_summary(self):
result += f" ({percentage:.1f}% of limit)"
if percentage > 80:
result += "\n\n⚠ **Context is getting full!**\n"
- result += "- Remove non-essential files via the `ContextManager` tool.\n"
- result += "- Keep only essential files in context for best performance"
+ result += "- Remove non-essential files, skills and tool servers via the `ResourceManager` tool.\n"
+ result += "- Keep only essential files, skills, and MCP servers in context for best performance"
result += "\n"
if not hasattr(self, "context_blocks_cache"):
self.context_blocks_cache = {}
@@ -659,7 +663,9 @@ def get_environment_info(self):
result += f"- Git repository: {rel_repo_dir} with {num_files:,} files\n"
except Exception:
result += "- Git repository: active but details unavailable\n"
- else:
+ if self.mcp_manager and self.mcp_manager.connected_servers:
+ num_mcp_servers = len(self.mcp_manager.connected_servers)
+ result += f"- Connected MCP servers: {num_mcp_servers}\n"
result += "- Git repository: none\n"
result += ""
return result
@@ -806,9 +812,7 @@ async def gather_and_await():
if self.auto_lint and used_write_tool:
edited = list(self.files_edited_by_tools)
lint_coro = self.lint_edited(edited, show_output=False)
- lint_errors, interrupted = await self.coroutines.interruptible(
- lint_coro, self.interrupt_event
- )
+ lint_errors, interrupted = await interruptible(lint_coro, self.interrupt_event)
if interrupted:
raise KeyboardInterrupt("Interrupted during linting")
@@ -928,9 +932,7 @@ async def reply_completed(self):
)
self.io.tool_output(waiting_msg)
sleep_coro = asyncio.sleep(command_timeout / 2)
- _res, interrupted = await self.coroutines.interruptible(
- sleep_coro, self.interrupt_event
- )
+ _res, interrupted = await interruptible(sleep_coro, self.interrupt_event)
if interrupted:
raise KeyboardInterrupt("Interrupted while waiting for background commands")
return True
@@ -1329,7 +1331,7 @@ async def check_for_file_mentions(self, content):
Override parent's method to disable implicit file mention handling in agent mode.
Files should only be added via explicit tool commands
- (`ContextManager`).
+ (`ResourceManager`).
"""
pass
@@ -1483,6 +1485,81 @@ def get_skills_content(self):
self.io.tool_error(f"Error generating skills content context: {str(e)}")
return None
+ def get_servers_context(self):
+ """
+ Generate a context block for available MCP servers.
+
+ Categorizes servers as:
+ - Active: Connected and passing includelist/excludelist filters
+ - Inactive: Connected but filtered out by includelist/excludelist
+ - Available (Disconnected): Managed but not currently connected
+
+ Returns:
+ Formatted context block string or None if no servers available
+ """
+ if not self.use_enhanced_context:
+ return None
+ try:
+ if not self.mcp_manager:
+ return None
+
+ all_servers = self.mcp_manager.servers
+ connected_servers = self.mcp_manager.connected_servers
+ connected_server_names = {s.name for s in connected_servers}
+
+ if not all_servers:
+ return None
+
+ # Apply registered_servers filtering to determine active vs inactive
+ incl = self.registered_servers.get("included", set())
+ excl = self.registered_servers.get("excluded", set())
+
+ active_servers = []
+ inactive_servers = []
+ for server in connected_servers:
+ name = server.name
+ if incl and name not in incl:
+ inactive_servers.append(name)
+ elif name in excl:
+ inactive_servers.append(name)
+ else:
+ active_servers.append(name)
+
+ # Servers managed but not currently connected
+ disconnected_servers = [
+ server.name for server in all_servers if server.name not in connected_server_names
+ ]
+
+ result = '\n'
+ result += "## Connected MCP Servers\n\n"
+
+ if active_servers:
+ result += f"Active ({len(active_servers)}):\n"
+ for name in sorted(active_servers):
+ result += f"- {name}\n"
+ result += "\n"
+
+ if inactive_servers:
+ result += f"Inactive (Filtered) ({len(inactive_servers)}):\n"
+ for name in sorted(inactive_servers):
+ result += f"- {name}\n"
+ result += "\n"
+
+ if disconnected_servers:
+ result += f"Available (Disconnected) ({len(disconnected_servers)}):\n"
+ for name in sorted(disconnected_servers):
+ result += f"- {name}\n"
+ result += "\n"
+
+ if not active_servers and not inactive_servers and not disconnected_servers:
+ result += "No MCP servers currently available.\n\n"
+
+ result += ""
+ return result
+ except Exception as e:
+ self.io.tool_error(f"Error generating servers context: {str(e)}")
+ return None
+
def get_sub_agents_context(self):
"""
Generate a context block for registered sub-agents.
diff --git a/cecli/coders/architect_coder.py b/cecli/coders/architect_coder.py
index 9eb6e53d68e..99d1ddff92e 100644
--- a/cecli/coders/architect_coder.py
+++ b/cecli/coders/architect_coder.py
@@ -45,7 +45,6 @@ async def reply_completed(self):
kwargs["args"] = self.args
kwargs["suggest_shell_commands"] = False
kwargs["map_tokens"] = 0
- kwargs["total_cost"] = self.total_cost
kwargs["cache_prompts"] = False
kwargs["num_cache_warming_pings"] = 0
kwargs["summarize_from_coder"] = False
diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index 1738092fb6e..ceeb4862a3d 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -31,12 +31,6 @@
from urllib.parse import urlparse
from uuid import uuid4 as generate_unique_id
-import httpx
-from litellm import experimental_mcp_client
-from litellm.types.utils import ModelResponse
-from prompt_toolkit.patch_stdout import patch_stdout
-from rich.console import Console
-
import cecli.prompts.utils.system as prompts
from cecli import __version__, models, urls, utils
from cecli.commands import Commands, SwitchCoderSignal
@@ -111,6 +105,28 @@ def wrap_fence(name):
wrap_fence("sourcecode"),
]
+# Map of edit_format values to coder class names.
+# Used by Coder.create() to find the right coder class by edit_format
+# without importing all coder modules.
+EDIT_FORMAT_MAP = {
+ "help": "HelpCoder",
+ "ask": "AskCoder",
+ "diff": "EditBlockCoder",
+ "diff-fenced": "EditBlockFencedCoder",
+ "whole": "WholeFileCoder",
+ "patch": "PatchCoder",
+ "udiff": "UnifiedDiffCoder",
+ "udiff-simple": "UnifiedDiffSimpleCoder",
+ "architect": "ArchitectCoder",
+ "editor-diff": "EditorEditBlockCoder",
+ "editor-whole": "EditorWholeFileCoder",
+ "editor-diff-fenced": "EditorDiffFencedCoder",
+ "context": "ContextCoder",
+ "agent": "AgentCoder",
+ "hashline": "HashLineCoder",
+ "subagent": "SubAgentCoder",
+}
+
class UsageMeta(type):
"""Metaclass that provides shared accumulator properties across all Coder subclasses.
@@ -335,9 +351,10 @@ async def create(
res = coders.CopyPasteCoder(main_model, io, args=args, **kwargs)
if not res:
- for coder in coders.__all__:
- if hasattr(coder, "edit_format") and coder.edit_format == edit_format:
- res = coder(main_model, io, args=args, **kwargs)
+ coder_name = EDIT_FORMAT_MAP.get(edit_format)
+ if coder_name:
+ coder_cls = getattr(coders, coder_name)
+ res = coder_cls(main_model, io, args=args, **kwargs)
if res is not None:
if from_coder:
@@ -359,11 +376,7 @@ async def create(
res.original_kwargs = dict(kwargs)
return res
- valid_formats = [
- str(c.edit_format)
- for c in coders.__all__
- if hasattr(c, "edit_format") and c.edit_format is not None
- ]
+ valid_formats = list(EDIT_FORMAT_MAP.keys())
raise UnknownEditFormat(edit_format, valid_formats)
async def clone(self, **kwargs):
@@ -1470,6 +1483,8 @@ async def run(self, with_message=None, preproc=True):
return await self._run_linear(with_message, preproc)
if self.io.prompt_session:
+ from prompt_toolkit.patch_stdout import patch_stdout
+
with patch_stdout(raw=True):
return await self._run_parallel(with_message, preproc)
else:
@@ -1954,7 +1969,10 @@ async def check_for_urls(self, inp: str) -> List[str]:
def keyboard_interrupt(self):
# Ensure cursor is visible on exit
- Console().show_cursor(True)
+ if not self.tui:
+ from rich.console import Console
+
+ Console().show_cursor(True)
self.io.tool_warning("^C KeyboardInterrupt")
self.interrupt_event.set()
@@ -1973,13 +1991,25 @@ async def compact_context_if_needed(self, force=False, message=""):
done_messages = manager.get_messages_dict(MessageTag.DONE)
cur_messages = manager.get_messages_dict(MessageTag.CUR)
diff_messages = manager.get_messages_dict(MessageTag.DIFFS)
+ all_messages = manager.get_messages_dict()
# Exclude first cur_message since that's the user's initial input
done_tokens = self.summarizer.count_tokens(done_messages)
cur_tokens = self.summarizer.count_tokens(cur_messages[1:] if len(cur_messages) > 1 else [])
diff_tokens = self.summarizer.count_tokens(diff_messages)
+ all_tokens = self.summarizer.count_tokens(all_messages)
+
combined_tokens = done_tokens + cur_tokens + diff_tokens
+ if force or (
+ all_tokens >= self.context_compaction_max_tokens * 0.9
+ and ConversationService.get_chunks(self).last_clear_count > 10
+ ):
+ manager.clear_tag(MessageTag.DIFFS)
+ manager.clear_tag(MessageTag.FILE_CONTEXTS)
+ ConversationService.get_files(self).clear_file_cache()
+ ConversationService.get_chunks(self).flush_removals()
+
if not force and combined_tokens < self.context_compaction_max_tokens:
return
@@ -2881,6 +2911,8 @@ async def _execute_local_tools(self, tool_calls):
async def _execute_mcp_tools(self, server, tool_calls):
"""Execute MCP tools via LiteLLM."""
+ import httpx
+
tool_responses = []
try:
# Connect to the server once
@@ -2928,6 +2960,8 @@ async def _execute_mcp_tools(self, server, tool_calls):
continue
async def do_tool_call():
+ from litellm import experimental_mcp_client
+
return await experimental_mcp_client.call_openai_tool(
session=session,
openai_tool=new_tool_call,
@@ -3366,6 +3400,8 @@ async def check_for_file_mentions(self, content):
return prompts.added_files.format(fnames=", ".join(added_fnames))
async def send(self, messages, model=None, functions=None, tools=None):
+ from litellm.types.utils import ModelResponse
+
self.interrupt_event.clear()
self.got_reasoning_content = False
self.ended_reasoning_content = False
@@ -3452,6 +3488,8 @@ async def send(self, messages, model=None, functions=None, tools=None):
self.io.ai_output(json.dumps(args, indent=4))
async def show_send_output(self, completion):
+ from litellm.types.utils import ModelResponse
+
if self.verbose:
print(completion)
diff --git a/cecli/coders/copypaste_coder.py b/cecli/coders/copypaste_coder.py
index 65dd32d5a1f..ec92f522e4b 100644
--- a/cecli/coders/copypaste_coder.py
+++ b/cecli/coders/copypaste_coder.py
@@ -78,14 +78,13 @@ def _init_prompts_from_selected_edit_format(self):
coders = None
target_coder_class = None
- if coders is not None:
- for coder_cls in getattr(coders, "__all__", []):
- if (
- hasattr(coder_cls, "edit_format")
- and coder_cls.edit_format == selected_edit_format
- ):
- target_coder_class = coder_cls
- break
+ if coders is not None and selected_edit_format:
+ # Use EDIT_FORMAT_MAP from base_coder to find the right coder class by edit_format
+ from cecli.coders.base_coder import EDIT_FORMAT_MAP
+
+ coder_name = EDIT_FORMAT_MAP.get(selected_edit_format)
+ if coder_name:
+ target_coder_class = getattr(coders, coder_name, None)
# Mirror prompt pack + edit_format where available.
if target_coder_class is not None:
diff --git a/cecli/commands/__init__.py b/cecli/commands/__init__.py
index ae4e83a84cd..8095f367e9e 100644
--- a/cecli/commands/__init__.py
+++ b/cecli/commands/__init__.py
@@ -35,6 +35,7 @@
from .hot_reload import HotReloadCommand
from .include_skill import IncludeSkillCommand
from .lint import LintCommand
+from .list_mcp import ListMcpCommand
from .list_sessions import ListSessionsCommand
from .list_skills import ListSkillsCommand
from .load import LoadCommand
@@ -123,6 +124,7 @@
CommandRegistry.register(SwitchAgentCommand)
CommandRegistry.register(IncludeSkillCommand)
CommandRegistry.register(LintCommand)
+CommandRegistry.register(ListMcpCommand)
CommandRegistry.register(ListSessionsCommand)
CommandRegistry.register(ListSkillsCommand)
CommandRegistry.register(LoadCommand)
@@ -210,6 +212,7 @@
"LoadCommand",
"LoadHookCommand",
"LoadMcpCommand",
+ "ListMcpCommand",
"LoadSessionCommand",
"LoadSkillCommand",
"LsCommand",
diff --git a/cecli/commands/agent_model.py b/cecli/commands/agent_model.py
index ceaa02ebf5f..758bb0da41e 100644
--- a/cecli/commands/agent_model.py
+++ b/cecli/commands/agent_model.py
@@ -47,7 +47,6 @@ async def execute(cls, io, coder, args, **kwargs):
kwargs["main_model"] = model
kwargs["edit_format"] = coder.edit_format # Keep the same edit format
kwargs["suggest_shell_commands"] = False
- kwargs["total_cost"] = coder.total_cost
kwargs["num_cache_warming_pings"] = 0
kwargs["summarize_from_coder"] = False
kwargs["done_messages"] = []
diff --git a/cecli/commands/editor_model.py b/cecli/commands/editor_model.py
index b76190a56d5..28725a20466 100644
--- a/cecli/commands/editor_model.py
+++ b/cecli/commands/editor_model.py
@@ -46,7 +46,6 @@ async def execute(cls, io, coder, args, **kwargs):
kwargs["main_model"] = model
kwargs["edit_format"] = coder.edit_format # Keep the same edit format
kwargs["suggest_shell_commands"] = False
- kwargs["total_cost"] = coder.total_cost
kwargs["num_cache_warming_pings"] = 0
kwargs["summarize_from_coder"] = False
kwargs["done_messages"] = []
diff --git a/cecli/commands/list_mcp.py b/cecli/commands/list_mcp.py
new file mode 100644
index 00000000000..0cf14c56446
--- /dev/null
+++ b/cecli/commands/list_mcp.py
@@ -0,0 +1,71 @@
+from cecli.commands.utils.base_command import BaseCommand
+from cecli.commands.utils.helpers import format_command_result
+
+
+class ListMcpCommand(BaseCommand):
+ NORM_NAME = "list-mcp"
+ DESCRIPTION = "List all loaded and configured MCP servers."
+
+ @classmethod
+ async def execute(cls, io, coder, args, **kwargs):
+ """Execute the list-mcp command."""
+ if not coder.mcp_manager:
+ return format_command_result(io, cls.NORM_NAME, "MCP manager is not configured.")
+
+ all_servers = coder.mcp_manager.servers
+ connected_servers = coder.mcp_manager.connected_servers
+
+ connected_server_names = {s.name for s in connected_servers}
+
+ # Apply per-coder registered_servers filtering to determine active status
+ incl = coder.registered_servers["included"]
+ excl = coder.registered_servers["excluded"]
+
+ active_servers = []
+ inactive_servers = []
+
+ for server in connected_servers:
+ name = server.name
+ # Same filtering logic used in base_coder.get_tool_list()
+ if incl and name not in incl:
+ inactive_servers.append(name)
+ elif name in excl:
+ inactive_servers.append(name)
+ else:
+ active_servers.append(name)
+
+ configured_servers = [
+ server for server in all_servers if server.name not in connected_server_names
+ ]
+
+ result = []
+ if active_servers:
+ result.append("Active MCP Servers:")
+ for name in sorted(active_servers):
+ result.append(f"- {name}")
+ else:
+ result.append("No MCP servers are active for this coder.")
+
+ if inactive_servers:
+ result.append("")
+ result.append("Inactive (Filtered) MCP Servers:")
+ for name in sorted(inactive_servers):
+ result.append(f"- {name}")
+
+ result.append("")
+ if configured_servers:
+ result.append("Configured MCP Servers:")
+ for server in sorted(configured_servers, key=lambda s: s.name):
+ result.append(f"- {server.name}")
+ else:
+ result.append("No other MCP servers are configured.")
+
+ return format_command_result(io, cls.NORM_NAME, "\n".join(result))
+
+ @classmethod
+ def get_help(cls) -> str:
+ """Get help text for the list-mcp command."""
+ help_text = super().get_help()
+ help_text += "\nUsage:\n"
+ help_text += " /list-mcp # Lists MCP servers with coder-sensitive active/inactive/configured status\n"
+ return help_text
diff --git a/cecli/commands/model.py b/cecli/commands/model.py
index 83bc18e6e5f..f09c9fc64c2 100644
--- a/cecli/commands/model.py
+++ b/cecli/commands/model.py
@@ -50,7 +50,6 @@ async def execute(cls, io, coder, args, **kwargs):
kwargs["main_model"] = model
kwargs["edit_format"] = new_edit_format
kwargs["suggest_shell_commands"] = False
- kwargs["total_cost"] = coder.total_cost
kwargs["num_cache_warming_pings"] = 0
kwargs["summarize_from_coder"] = False
kwargs["done_messages"] = []
diff --git a/cecli/commands/remove_mcp.py b/cecli/commands/remove_mcp.py
index 6a08ef33a5f..2b32307ecd1 100644
--- a/cecli/commands/remove_mcp.py
+++ b/cecli/commands/remove_mcp.py
@@ -10,7 +10,7 @@
class RemoveMcpCommand(BaseCommand):
NORM_NAME = "remove-mcp"
- DESCRIPTION = "Remove a MCP server by name, or use '*' to remove all"
+ DESCRIPTION = "Remove (unload) a MCP server by name, or use '*' to remove all"
@classmethod
async def execute(cls, io, coder, args, **kwargs):
diff --git a/cecli/commands/weak_model.py b/cecli/commands/weak_model.py
index acff8a48e30..12d79f6cffd 100644
--- a/cecli/commands/weak_model.py
+++ b/cecli/commands/weak_model.py
@@ -46,7 +46,6 @@ async def execute(cls, io, coder, args, **kwargs):
kwargs["main_model"] = model
kwargs["edit_format"] = coder.edit_format # Keep the same edit format
kwargs["suggest_shell_commands"] = False
- kwargs["total_cost"] = coder.total_cost
kwargs["num_cache_warming_pings"] = 0
kwargs["summarize_from_coder"] = False
kwargs["done_messages"] = []
diff --git a/cecli/helpers/conversation/integration.py b/cecli/helpers/conversation/integration.py
index a245f50df64..a9381d4ecbb 100644
--- a/cecli/helpers/conversation/integration.py
+++ b/cecli/helpers/conversation/integration.py
@@ -18,7 +18,7 @@ class ConversationChunks:
def __init__(self, coder):
self.coder = weakref.ref(coder)
self.uuid = coder.uuid
- self._last_clear_count = 0
+ self.last_clear_count = 0
self._deferred_removals = set()
@classmethod
@@ -282,14 +282,14 @@ def cleanup_files(self) -> None:
if diff_count > 0 and other_count > 0 and diff_count / other_count > 20:
should_clear = True
- self._last_clear_count += 1
+ self.last_clear_count += 1
if (
should_clear
- and self._last_clear_count >= 20
+ and self.last_clear_count >= 20
and diff_tokens + other_tokens > coder.context_compaction_max_tokens * 0.5
):
- self._last_clear_count = 0
+ self.last_clear_count = 0
# Clear all diff messages
ConversationService.get_manager(coder).clear_tag(MessageTag.DIFFS)
@@ -890,7 +890,7 @@ def add_static_context_blocks(self) -> None:
"""
Add static context blocks to conversation (priority 50).
- Static blocks include: environment_info, directory_structure, skills
+ Static blocks include: environment_info, directory_structure, skills, servers, sub_agents
"""
coder = self.get_coder()
if not coder:
@@ -922,6 +922,10 @@ def add_static_context_blocks(self) -> None:
block = coder._generate_context_block("skills")
if block:
message_blocks["skills"] = block
+ if "servers" in coder.allowed_context_blocks:
+ block = coder._generate_context_block("servers")
+ if block:
+ message_blocks["servers"] = block
# Add static blocks to conversation manager with stable hash keys
for block_type, block_content in message_blocks.items():
diff --git a/cecli/helpers/file_system/service.py b/cecli/helpers/file_system/service.py
index 42ed8ff83ca..478f683ad47 100644
--- a/cecli/helpers/file_system/service.py
+++ b/cecli/helpers/file_system/service.py
@@ -71,16 +71,19 @@ def reset_instance(cls) -> None:
cls._instance = None
@classmethod
- def get_instance(cls, root: str = ".", repo=None) -> "FileSystemService":
+ def get_instance(cls, root: str | None = None, repo=None) -> "FileSystemService":
"""
Return the global singleton.
On first call, creates and builds the instance using root/repo.
- Subsequent calls return the existing instance (root/repo
- parameters are ignored). This ensures all agents share one
- file index regardless of when they're spawned.
+ Subsequent calls return the existing instance. If a non-None root
+ is explicitly provided and differs from the current root, the
+ singleton is rebuilt (e.g. when a new coder is created in a
+ different working directory).
"""
if cls._instance is None:
+ cls._instance = cls._create(root=root or ".", repo=repo)
+ elif root is not None and root != cls._instance.root:
cls._instance = cls._create(root=root, repo=repo)
return cls._instance
diff --git a/cecli/helpers/hashline.py b/cecli/helpers/hashline.py
index 4d985578b20..c6904ef3d23 100644
--- a/cecli/helpers/hashline.py
+++ b/cecli/helpers/hashline.py
@@ -1,8 +1,6 @@
import difflib
import re
-from cecli.helpers.grep_ast.parsers import filename_to_lang
-from cecli.helpers.grep_ast.tsl import get_language, get_parser
from cecli.helpers.hashpos.hashpos import HashPos
HASHLINE_PREFIX_RE = HashPos.HASH_PREFIX_RE
@@ -41,7 +39,7 @@ def strip_hashline(text: str) -> str:
def normalize_hashline(hashline_str: str) -> str:
"""
- Normalize a hashline string to the 4-character hash fragment.
+ Normalize a hashline string to the content id hash fragment.
"""
if hashline_str in ("@000", "000@"):
return hashline_str
@@ -430,8 +428,8 @@ def get_hashline_diff(
original_lines = original_content.splitlines()
text_lines = text.splitlines()
# Get up to 3 lines of context before (ending at found_end) and after the insertion point
- ctx_before = original_lines[max(0, found_end - 2) : found_end + 1]
- ctx_after = original_lines[found_end + 1 : min(len(original_lines), found_end + 4)]
+ ctx_before = original_lines[max(0, found_end - 6) : found_end]
+ ctx_after = original_lines[found_end + 1 : min(len(original_lines), found_end + 7)]
# Build a mini document with context so HashPos computes correct neighborhood hashes
mini_lines = ctx_before + text_lines + ctx_after
mini_text = "\n".join(mini_lines)
@@ -453,8 +451,8 @@ def get_hashline_diff(
original_lines = original_content.splitlines()
text_lines = text.splitlines()
# Get up to 3 lines of context before and after the range
- ctx_before = original_lines[max(0, found_start - 3) : found_start]
- ctx_after = original_lines[found_end + 1 : min(len(original_lines), found_end + 4)]
+ ctx_before = original_lines[max(0, found_start - 6) : found_start]
+ ctx_after = original_lines[found_end + 1 : min(len(original_lines), found_end + 7)]
# Build a mini document with context so HashPos computes correct neighborhood hashes
mini_lines = ctx_before + text_lines + ctx_after
mini_text = "\n".join(mini_lines)
@@ -479,8 +477,8 @@ def get_hashline_diff(
# Strip line endings for difflib comparison but keep them in the actual lines
diff = difflib.unified_diff(
- [line.rstrip("\r\n") for line in find_lines],
- [line.rstrip("\r\n") for line in replace_lines],
+ [strip_hashline(line.rstrip("\r\n")) for line in find_lines],
+ [strip_hashline(line.rstrip("\r\n")) for line in replace_lines],
lineterm="",
n=1,
)
@@ -1450,6 +1448,9 @@ def get_indentation(line: str) -> int:
return resolved_ops
# Determine language from file path
+ from cecli.helpers.grep_ast.parsers import filename_to_lang
+ from cecli.helpers.grep_ast.tsl import get_language, get_parser
+
lang = filename_to_lang(file_path)
if not lang:
return resolved_ops
@@ -1463,7 +1464,6 @@ def get_indentation(line: str) -> int:
return resolved_ops
source_lines = original_content.splitlines()
- MAX_STEPS = 3 # Maximum expansion steps
for resolved in resolved_ops:
op = resolved["op"]
@@ -1489,47 +1489,35 @@ def get_indentation(line: str) -> int:
llm_end = min(llm_end, len(source_lines) - 1)
# --- THE HEALING LOOP ---
- # Try original bounds first (distance 0), then progressively expand
- # outward in rounds. At each round d>=1, test exactly 4 candidates:
- # 1. Both indices down by d lines (range shifts down)
- # 2. Both indices up by d lines (range shifts up)
- # 3. Start index down by d lines, end unchanged (partial expansion)
- # 4. End index down by d lines, start unchanged (partial expansion)
- #
- # If multiple candidates are valid at a round, select using:
- # 1. Longest resulting source code (preserve more code)
- # 2. Partial expansions over full range shifts
- # 3. Downward changes over upward changes
-
- found_valid = False
- EFFECTIVE_STEPS = MAX_STEPS
-
- if llm_start == llm_end:
- EFFECTIVE_STEPS = 1
-
- for distance in range(EFFECTIVE_STEPS + 1):
- if distance == 0:
- round_candidates = [(0, 0)]
- else:
- round_candidates = [
- (-distance, 0), # Start down only (partial)
- (+distance, 0), # Start up only (partial)
- (0, +distance), # End down only (partial)
- (0, -distance), # End up only (partial)
- (-distance, +distance), # Both indices down
- (+distance, -distance), # Both indices up
- (+distance, +distance), # Expand outward (start up, end down)
- (-distance, -distance), # Contract inward (start down, end up)
- ]
-
- valid_at_round = []
- for start_shift, end_shift in round_candidates:
+ # Generate all combinations of offsets in {-2, -1, 0, 1, 2} for both
+ # start and end, then filter to syntactically valid edits and rank
+ # by cumulative movement (abs(start_shift) + abs(end_shift)) as a
+ # primary criterion to bias toward minimal range perturbation.
+
+ offsets = [0, 1, -1, 2, -2]
+ all_candidates = []
+ for start_shift in offsets:
+ for end_shift in offsets:
candidate_start = max(0, llm_start - start_shift)
candidate_end = min(len(source_lines) - 1, llm_end + end_shift)
if candidate_end < candidate_start:
continue
+ # Skip candidates that overlap with other operations' ranges
+ overlaps = False
+ for other_op in resolved_ops:
+ if other_op is resolved:
+ continue
+ other_start = other_op["start_idx"]
+ other_end = other_op["end_idx"]
+ if candidate_start <= other_end and candidate_end >= other_start:
+ overlaps = True
+ break
+
+ if overlaps:
+ continue
+
# Skip candidates that would create duplicate adjacent content at edit boundaries
if _would_create_duplicate_content(
source_lines, candidate_start, candidate_end, repl_lines
@@ -1549,6 +1537,9 @@ def get_indentation(line: str) -> int:
is_downward = start_shift <= 0 # Negative/zero shift = moving down
is_both = start_shift == end_shift # Whole range expansion/contraction
closures = count_closures(test_source)
+ start_line_match = source_lines[candidate_start] == source_lines[llm_start]
+ end_line_match = source_lines[candidate_end] == source_lines[llm_end]
+ cumulative_movement = abs(start_shift) + abs(end_shift)
# --- INDENTATION SCORING ---
indent_score = 0
@@ -1575,45 +1566,48 @@ def get_indentation(line: str) -> int:
) == get_indentation(source_end_line):
indent_score += 1
- valid_at_round.append(
+ all_candidates.append(
{
"start_idx": candidate_start,
"end_idx": candidate_end,
+ "start_line_match": start_line_match,
+ "end_line_match": end_line_match,
"source_len": len(test_source),
"is_partial": is_partial,
"is_downward": is_downward,
"is_both": is_both,
"closure_count": closures,
"indent_score": indent_score,
+ "cumulative_movement": cumulative_movement,
+ "offsets": (start_shift, end_shift),
}
)
- if valid_at_round:
- # Sort using the new hierarchy:
- # 1. Fewest total closures (minimize structural pollution)
- # 2. Indentation Score (Descending: 2 is better than 0)
- # 3. Longest source (preserve more file content)
- # 4. Partial expansions over full range shifts
- # 5. Downward changes over upward changes
- valid_at_round.sort(
- key=lambda r: (
- -r["indent_score"], # Descending: larger score is better (2 > 1 > 0)
- -r["source_len"], # Descending: larger is better
- r["closure_count"], # Ascending: smaller is better
- not r["is_partial"], # Booleans: False comes before True
- not r["is_downward"], # Booleans: False comes before True
- r["is_both"], # Booleans: False comes before True
- )
+ if all_candidates:
+ # Sort using the new hierarchy:
+ # 1. Cumulative movement (ascending — less movement is better)
+ # 2. Fewest total closures (minimize structural pollution)
+ # 3. Indentation Score (Descending: 2 is better than 0)
+ # 4. Longest source (preserve more file content)
+ # 5. Partial expansions over full range shifts
+ # 6. Downward changes over upward changes
+ all_candidates.sort(
+ key=lambda r: (
+ not r["start_line_match"], # Descending: True first
+ not r["end_line_match"], # Descending: True first
+ r["cumulative_movement"], # Ascending: smaller is better
+ -r["indent_score"], # Descending: larger score is better (2 > 1 > 0)
+ -r["source_len"], # Descending: larger is better
+ r["closure_count"], # Ascending: smaller is better
+ not r["is_partial"], # Booleans: False comes before True
+ not r["is_downward"], # Booleans: False comes before True
+ r["is_both"], # Booleans: False comes before True
)
+ )
- best = valid_at_round[0]
- resolved["start_idx"] = best["start_idx"]
- resolved["end_idx"] = best["end_idx"]
- found_valid = True
- break
-
- if not found_valid:
- pass
+ best = all_candidates[0]
+ resolved["start_idx"] = best["start_idx"]
+ resolved["end_idx"] = best["end_idx"]
return resolved_ops
@@ -1758,6 +1752,12 @@ def apply_hashline_operations(
resolved_ops = _merged_contained_ranges(resolved_ops)
# Merge contiguous replace operations
resolved_ops = _merge_replace_operations(resolved_ops)
+
+ # Sort by start_idx descending to apply from bottom to top
+ # When operations have same start_idx, apply in order: insert, replace, delete
+ # This ensures correct behavior when multiple operations target the same line
+ resolved_ops.sort(key=sort_ranges)
+
if file_path:
# Apply tree-sitter based closure safeguard to snap boundaries to AST nodes
resolved_ops = _apply_closure_safeguard(original_content, resolved_ops, file_path)
@@ -1766,11 +1766,6 @@ def apply_hashline_operations(
source_lines = original_content.splitlines()
resolved_ops = _fix_duplicate_content_boundaries(source_lines, resolved_ops)
- # Sort by start_idx descending to apply from bottom to top
- # When operations have same start_idx, apply in order: insert, replace, delete
- # This ensures correct behavior when multiple operations target the same line
- resolved_ops.sort(key=sort_ranges)
-
successful_ops = []
# Loop to apply operations in sorted order (bottom-to-top)
for resolved in resolved_ops:
diff --git a/cecli/helpers/hashpos/hashpos.py b/cecli/helpers/hashpos/hashpos.py
index 516052012c9..280e3394ec4 100644
--- a/cecli/helpers/hashpos/hashpos.py
+++ b/cecli/helpers/hashpos/hashpos.py
@@ -4,37 +4,67 @@
class HashPos:
- B64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~_"
- # Regex pattern for HashPos format: {4-char-hash}::
- HASH_PREFIX_RE = re.compile(r"^([0-9a-zA-Z\~_@]{4})::")
- # Regex for normalization: 4 hash chars optionally followed by '::'
- NORMALIZE_RE = re.compile(r"^([0-9a-zA-Z\~_@]{4})(?:)?::")
- # Regex for a raw 4-character fragment
- FRAGMENT_RE = re.compile(r"^[0-9a-zA-Z\~_@]{4}$")
+ B256 = (
+ "ABCDEFGHIJKLMNOP"
+ "QRSTUVWXYZabcdef"
+ "ghijklmnopqrstuv"
+ "wxyz0123456789~_"
+ "áéíóúñüöäßåøæçèà"
+ "ùîôûбгджзийлпфцч"
+ "шщъыьэюя的是不了人我在有"
+ "他这为之大来以个中上们到说国和学"
+ "あいうえおかきくけこさしすせそた"
+ "ちつてとアイウエオカキクケコサシ"
+ "スセソタチツテトαβγδεζηθ"
+ "ικλμνξπ要会出就道也时年得"
+ "生自下而过能可对行没发用天作方成"
+ "者多日都三小机把理实心看起样好当"
+ "点本民事其然想经去种动全意面前所"
+ "业定现将法新明问度但最美月手走信"
+ )
+
+ # We use a regex-safe character class string for compiling patterns
+ _B256_REGEX_SET = (
+ "A-Za-z0-9~_"
+ "áéíóúñüöäßåøæçèà"
+ "ùîôûбгджзийлпфцч"
+ "шщъыьэюя的是不了人我在有"
+ "他这为之大来以个中上们到说国和学"
+ "あいうえおかきくけこさしすせそた"
+ "ちつてとアイウエオカキクケコサシ"
+ "スセソタチツテトαβγδεζηθ"
+ "ικλμνξπ要会出就道也时年得"
+ "生自下而过能可对行没发用天作方成"
+ "者多日都三小机把理实心看起样好当"
+ "点本民事其然想经去种动全意面前所"
+ "业定现将法新明问度但最美月手走信"
+ )
+
+ # Regex pattern for HashPos format: {3-char-hash}::
+ HASH_PREFIX_RE = re.compile(rf"^([{_B256_REGEX_SET}]{{3}})::")
+ # Regex for normalization: 3 hash chars optionally followed by '::'
+ NORMALIZE_RE = re.compile(rf"^([{_B256_REGEX_SET}]{{3}})(?:)?::")
+ # Regex for a raw 3-character fragment
+ FRAGMENT_RE = re.compile(rf"^[{_B256_REGEX_SET}]{{3}}$")
def __init__(self, source_text: str = ""):
self.lines = source_text.splitlines()
self.total = len(self.lines)
- def _get_region_bits(self, line_idx: int) -> tuple[int, int]:
+ def _get_region_val(self, line_idx: int) -> int:
"""
- Uses line_idx modulo 16 (4 bits) to get two 2-bit flags (b1, b2).
- This guarantees up to 16 consecutive repeating lines get unique spatial anchors.
+ Uses line_idx modulo 16 (4 bits).
+ Guarantees up to 16 consecutive repeating lines get unique spatial anchors.
"""
- mod_val = line_idx % 16
-
- # Split the 4-bit modulo value into two separate 2-bit flags
- b1 = (mod_val >> 2) & 3 # Top 2 bits (mask with 0b11)
- b2 = mod_val & 3 # Bottom 2 bits
- return b1, b2
+ return line_idx % 16
def _get_neighborhood_hash(self, line_idx: int) -> int:
"""
Creates a 20-bit digest using the current line and the 3 lines
before and after it.
"""
- start = max(0, line_idx - 3)
- end = min(self.total, line_idx + 4)
+ start = max(0, line_idx - 2)
+ end = min(self.total, line_idx + 3)
context_window = "\n".join(self.lines[start:end])
full_hash = xxhash.xxh3_64_intdigest(context_window.encode("utf-8"))
@@ -51,22 +81,20 @@ def generate_private_id(self, text: str) -> str:
def generate_public_id(self, text: str, line_idx: int) -> str:
"""
- Generates a 4-char Base64 ID combining modulo buckets and context hash.
- Layout: [2-bit b1] [2-bit b2] [10-bit Hash A] [10-bit Hash B]
+ Generates a 3-char Base256 ID combining a 4-bit modulo bucket and a 20-bit context hash.
+ Layout: [4-bit Region] [20-bit Neighborhood Hash] = 24 bits total.
+ Each Base256 char holds 8 bits (3 chars * 8 = 24 bits).
"""
- b1, b2 = self._get_region_bits(line_idx)
+ region_val = self._get_region_val(line_idx)
neighborhood_hash = self._get_neighborhood_hash(line_idx)
- # Split the 20-bit hash into two 10-bit halves
- hash_a = (neighborhood_hash >> 10) & 0x3FF
- hash_b = neighborhood_hash & 0x3FF
+ # Pack the 24-bit integer
+ packed = (region_val << 20) | neighborhood_hash
- # Construct the mixed 24-bit integer
- packed = (b1 << 22) | (b2 << 20) | (hash_a << 10) | hash_b
res = ""
- for _ in range(4):
- res += self.B64[packed % 64]
- packed //= 64
+ for _ in range(3):
+ res += self.B256[packed % 256]
+ packed //= 256
return res
def unpack_public_id(self, public_id: str) -> tuple[int, int]:
@@ -75,16 +103,13 @@ def unpack_public_id(self, public_id: str) -> tuple[int, int]:
"""
packed = 0
for i, char in enumerate(public_id):
- packed |= self.B64.index(char) << (6 * i)
+ packed |= self.B256.index(char) << (8 * i)
- b1 = (packed >> 22) & 3
- b2 = (packed >> 20) & 3
- hash_a = (packed >> 10) & 0x3FF
- hash_b = packed & 0x3FF
- mod_val = (b1 << 2) | b2
- neighborhood_hash = (hash_a << 10) | hash_b
+ # Extract the 4-bit region (mask 0xF) and 20-bit hash (mask 0xFFFFF)
+ region_val = (packed >> 20) & 0xF
+ neighborhood_hash = packed & 0xFFFFF
- return mod_val, neighborhood_hash
+ return region_val, neighborhood_hash
def format_content(self, use_private_ids: bool = False, start_line: int = 1) -> str:
formatted_lines = []
@@ -128,11 +153,6 @@ def modulo_distance(idx: int) -> int:
def resolve_range(self, start_id: str, end_id: str) -> tuple[int, int]:
"""
Resolves a block range from two Public IDs.
-
- Logic:
- 1. Resolve all candidates for both IDs (sorted by best match).
- 2. Find the pair of (start, end) that are logically ordered.
- 3. Returns (start_index, end_index)
"""
starts = self.resolve_to_lines(start_id)
ends = self.resolve_to_lines(end_id)
@@ -146,28 +166,17 @@ def resolve_range(self, start_id: str, end_id: str) -> tuple[int, int]:
return s, e
raise ValueError(
- f"Found matches for {start_id} and {end_id}, but no logically ordered range or unique"
- " matches."
+ f"Found matches for {start_id} and {end_id}, but no logically ordered range or unique matches."
)
@staticmethod
def strip_prefix(text: str) -> str:
- r"""
+ """
Remove HashPos prefixes from the start of every line.
-
- Removes prefixes that match the pattern: "{4-char-hash}"
- where the hash is exactly 4 characters from the set [0-9a-zA-Z\~_@] followed by '::'.
-
- Args:
- text: Input text with HashPos prefixes
-
- Returns:
- String with HashPos prefixes removed from each line
"""
lines = text.splitlines(keepends=True)
result_lines = []
for line in lines:
- # Remove the HashPos prefix if present
stripped_line = HashPos.HASH_PREFIX_RE.sub("", line, count=1)
result_lines.append(stripped_line)
@@ -177,12 +186,6 @@ def strip_prefix(text: str) -> str:
def extract_prefix(line: str) -> str:
"""
Extract the hash prefix from a line if it has a HashPos prefix.
-
- Args:
- line: A line of text that may contain a HashPos prefix
-
- Returns:
- The hash prefix (4 characters) if found, otherwise empty string
"""
match = HashPos.HASH_PREFIX_RE.match(line)
if match:
@@ -192,25 +195,11 @@ def extract_prefix(line: str) -> str:
@staticmethod
def normalize(hashpos_str: str) -> str:
"""
- Normalize a HashPos string to the 4-character hash fragment.
-
- Accepts HashPos strings in "{hash_prefix}::" format or a raw "{hash_prefix}" fragment.
- Also extracts HashPos from strings that contain content after the HashPos,
- e.g., "H7M5::Line 1"
-
- Args:
- hashpos_str: HashPos string in various formats
-
- Returns:
- str: The 4-character hash fragment
-
- Raises:
- ValueError: If format is invalid
+ Normalize a HashPos string to the 3-character hash fragment.
"""
if hashpos_str is None:
raise ValueError("HashPos string cannot be None")
- # Check if it's already a raw fragment
if HashPos.FRAGMENT_RE.match(hashpos_str):
return hashpos_str
@@ -218,9 +207,7 @@ def normalize(hashpos_str: str) -> str:
if match:
return match.group(1)
- # If no pattern matches, raise error
raise ValueError(
f"Invalid HashPos format '{hashpos_str}'. "
- r"Expected \"{content ID}\" "
- r"where content ID is exactly 4 characters from the set [0-9a-zA-Z\~_@]."
+ r"Expected a 3-character string from the Base256 character set."
)
diff --git a/cecli/helpers/responses.py b/cecli/helpers/responses.py
index 0bc9cecf909..8a77db27bb4 100644
--- a/cecli/helpers/responses.py
+++ b/cecli/helpers/responses.py
@@ -1,14 +1,18 @@
+from __future__ import annotations
+
import json
import re
import time
-from typing import List, Optional
+from typing import TYPE_CHECKING, List, Optional
import json_repair
-from litellm.types.utils import ChatCompletionMessageToolCall, Function
from cecli import utils
from cecli.helpers import nested
+if TYPE_CHECKING:
+ from litellm.types.utils import ChatCompletionMessageToolCall, Function # noqa
+
def preprocess_json(response: str) -> str:
# This pattern matches any sequence of backslashes followed by
@@ -35,6 +39,8 @@ def extract_tools_from_content_json(content: str) -> Optional[List[ChatCompletio
Simple extraction of JSON-like structures that look like tool calls.
This handles models that write JSON in text instead of using native calling.
"""
+ from litellm.types.utils import ChatCompletionMessageToolCall, Function # noqa
+
if not content or ("{" not in content and "[" not in content):
return None
@@ -111,6 +117,8 @@ def extract_tools_from_content_xml(content: str) -> Optional[List[ChatCompletion
"""
+ from litellm.types.utils import ChatCompletionMessageToolCall, Function # noqa
+
if not content or (" Optional[List[ChatCompletion
Example:
[Local--ReadRange(show=[{"file_path": "agent.py", "start_text": "class A"}], verbose=true, mode="strict")]
"""
+ from litellm.types.utils import ChatCompletionMessageToolCall, Function # noqa
+
if not content or "[" not in content:
return None
@@ -278,6 +288,8 @@ def prefix_tool_call(tool_call, server_name: str):
Returns:
New tool call with prefixed function name (same type as input)
"""
+ from litellm.types.utils import ChatCompletionMessageToolCall, Function # noqa
+
# Handle ChatCompletionMessageToolCall objects
if hasattr(tool_call, "function") and hasattr(tool_call.function, "name"):
# Create a copy of the tool call object
@@ -316,6 +328,8 @@ def unprefix_tool_call(tool_call):
Tuple of (server_name, unprefixed_tool_call) where server_name may be empty string
if no prefix is found (same type as input)
"""
+ from litellm.types.utils import ChatCompletionMessageToolCall, Function # noqa
+
# Handle ChatCompletionMessageToolCall objects
if hasattr(tool_call, "function") and hasattr(tool_call.function, "name"):
server_name, unprefixed_name = unprefix_tool_name(tool_call.function.name)
diff --git a/cecli/linter.py b/cecli/linter.py
index 434724e2bdf..45003c6b01d 100644
--- a/cecli/linter.py
+++ b/cecli/linter.py
@@ -10,8 +10,6 @@
import oslex
from cecli.dump import dump # noqa: F401
-from cecli.helpers.grep_ast import TreeContext, filename_to_lang
-from cecli.helpers.grep_ast.tsl import get_parser # noqa: E402
from cecli.helpers.threading import ThreadSafeEvent
from cecli.run_cmd import run_cmd_async, run_cmd_subprocess # noqa: F401
@@ -80,6 +78,8 @@ def errors_to_lint_result(self, rel_fname, errors):
return LintResult(text=errors, lines=linenums)
async def lint(self, fname, cmd=None):
+ from cecli.helpers.grep_ast import filename_to_lang
+
rel_fname = self.get_rel_fname(fname)
try:
code = Path(fname).read_text(encoding=self.encoding, errors="replace")
@@ -204,6 +204,9 @@ def basic_lint(fname, code):
Use tree-sitter to look for syntax errors, display them with tree context.
"""
+ from cecli.helpers.grep_ast import filename_to_lang
+ from cecli.helpers.grep_ast.tsl import get_parser
+
lang = filename_to_lang(fname)
if not lang:
return
@@ -233,6 +236,8 @@ def basic_lint(fname, code):
def tree_context(fname, code, line_nums):
+ from cecli.helpers.grep_ast import TreeContext
+
context = TreeContext(
fname,
code,
diff --git a/cecli/llm.py b/cecli/llm.py
index e177854c9fd..84add1f6ac2 100644
--- a/cecli/llm.py
+++ b/cecli/llm.py
@@ -16,6 +16,7 @@
os.environ["OR_SITE_URL"] = SITE_URL
os.environ["OR_APP_NAME"] = APP_NAME
os.environ["LITELLM_MODE"] = "PRODUCTION"
+os.environ["LITELLM_LOCAL_MODEL_COST"] = "true"
# `import litellm` takes 1.5 seconds, defer it!
diff --git a/cecli/main.py b/cecli/main.py
index 65913f886be..6cb779e7bca 100644
--- a/cecli/main.py
+++ b/cecli/main.py
@@ -8,8 +8,6 @@
except Exception:
pass
-from cecli.helpers.file_searcher import handle_core_files
-
try:
if not os.getenv("CECLI_DEFAULT_TLS"):
import truststore
@@ -44,28 +42,6 @@
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
from prompt_toolkit.enums import EditingMode
-from cecli import __version__, models, urls, utils
-from cecli.args import get_parser
-from cecli.coders import AgentCoder, Coder
-from cecli.coders.base_coder import UnknownEditFormat
-from cecli.commands import Commands, ReloadProgramSignal, SwitchCoderSignal
-from cecli.deprecated_args import handle_deprecated_model_args
-from cecli.format_settings import format_settings, scrub_sensitive_info
-from cecli.helpers.conversation import ConversationService, MessageTag
-from cecli.helpers.copypaste import ClipboardWatcher
-from cecli.helpers.file_searcher import generate_search_path_list
-from cecli.history import ChatSummary
-from cecli.hooks import HookService
-from cecli.io import InputOutput
-from cecli.llm import litellm
-from cecli.mcp import McpServerManager, load_mcp_servers
-from cecli.models import ModelSettings
-from cecli.onboarding import offer_openrouter_oauth, select_default_model
-from cecli.repo import ANY_GIT_ERROR, GitRepo
-from cecli.report import report_uncaught_exceptions, set_args_error_data
-from cecli.versioncheck import check_version
-from cecli.watch import FileWatcher
-
from .dump import dump # noqa
@@ -129,6 +105,8 @@ def get_git_root():
def guessed_wrong_repo(io, git_root, fnames, git_dname):
"""After we parse the args, we can determine the real repo. Did we guess wrong?"""
+ from cecli.repo import ANY_GIT_ERROR, GitRepo
+
try:
check_repo = Path(GitRepo(io, fnames, git_dname).root).resolve()
except (OSError,) + ANY_GIT_ERROR:
@@ -157,6 +135,8 @@ def validate_tui_args(args):
async def make_new_repo(git_root, io):
+ from cecli.repo import ANY_GIT_ERROR
+
try:
repo = git.Repo.init(git_root)
await check_gitignore(git_root, io, False)
@@ -169,6 +149,8 @@ async def make_new_repo(git_root, io):
async def setup_git(git_root, io):
+ from cecli.repo import ANY_GIT_ERROR
+
if git is None:
return
try:
@@ -216,6 +198,8 @@ async def setup_git(git_root, io):
async def check_gitignore(git_root, io, ask=True):
+ from cecli.repo import ANY_GIT_ERROR
+
if not git_root:
return
try:
@@ -288,6 +272,9 @@ def parse_lint_cmds(lint_cmds, io):
def register_models(git_root, model_settings_fname, io, verbose=False):
+ from cecli import models
+ from cecli.helpers.file_searcher import generate_search_path_list
+
model_settings_files = generate_search_path_list(
".cecli.model.settings.yml", git_root, model_settings_fname
)
@@ -317,6 +304,8 @@ def register_models(git_root, model_settings_fname, io, verbose=False):
def load_dotenv_files(git_root, dotenv_fname, encoding="utf-8"):
+ from cecli.helpers.file_searcher import generate_search_path_list, handle_core_files
+
dotenv_files = generate_search_path_list(".env", git_root, dotenv_fname)
oauth_keys_file = handle_core_files(Path.home() / ".cecli" / "oauth-keys.env")
if oauth_keys_file.exists():
@@ -336,6 +325,9 @@ def load_dotenv_files(git_root, dotenv_fname, encoding="utf-8"):
def register_litellm_models(git_root, model_metadata_fname, io, verbose=False):
+ from cecli import models
+ from cecli.helpers.file_searcher import generate_search_path_list
+
model_metadata_files = []
resource_metadata = importlib_resources.files("cecli.resources").joinpath("model-metadata.json")
model_metadata_files.append(str(resource_metadata))
@@ -361,15 +353,22 @@ def register_litellm_models(git_root, model_metadata_fname, io, verbose=False):
def load_model_overrides(git_root, model_overrides_fname, io, verbose=False):
"""Load model tag overrides from a YAML file."""
+ from cecli import models
+
models.ModelOverrides.load_from_file(git_root, model_overrides_fname, io, verbose=verbose)
def load_model_overrides_from_string(model_overrides_str, io):
"""Load model tag overrides from a JSON/YAML string."""
+ from cecli import models
+
models.ModelOverrides.load_from_string(model_overrides_str, io)
async def sanity_check_repo(repo, io):
+ from cecli import urls
+ from cecli.repo import ANY_GIT_ERROR
+
if not repo:
return True
if not repo.repo.working_tree_dir:
@@ -471,6 +470,9 @@ def custom_tracer(frame, event, arg):
def main(argv=None, input=None, output=None, force_git_root=None, return_coder=False):
+ from cecli.commands import ReloadProgramSignal
+ from cecli.hooks import HookService
+
# Tracks the coder instance from a ReloadProgramSignal so the new
# main_async() can pass it as from_coder to Coder.create(), preserving
# UUID, edit_format, and other state across the reload cycle.
@@ -519,6 +521,28 @@ async def main_async(
return_coder=False,
from_coder=None,
):
+ from cecli import models, urls, utils
+ from cecli.args import get_parser
+ from cecli.coders import Coder
+ from cecli.coders.base_coder import UnknownEditFormat
+ from cecli.commands import Commands, ReloadProgramSignal, SwitchCoderSignal
+ from cecli.deprecated_args import handle_deprecated_model_args
+ from cecli.format_settings import format_settings, scrub_sensitive_info
+ from cecli.helpers.conversation import ConversationService, MessageTag
+ from cecli.helpers.copypaste import ClipboardWatcher
+ from cecli.helpers.file_searcher import handle_core_files
+ from cecli.history import ChatSummary
+ from cecli.hooks import HookService
+ from cecli.io import InputOutput
+ from cecli.llm import litellm
+ from cecli.mcp import McpServerManager, load_mcp_servers
+ from cecli.models import ModelSettings
+ from cecli.onboarding import offer_openrouter_oauth, select_default_model
+ from cecli.repo import GitRepo
+ from cecli.report import report_uncaught_exceptions, set_args_error_data
+ from cecli.versioncheck import check_version
+ from cecli.watch import FileWatcher
+
report_uncaught_exceptions()
if argv is None:
argv = sys.argv[1:]
@@ -1319,7 +1343,10 @@ def get_io(pretty):
old_coder = coder
coder = await Coder.create(**kwargs)
- if isinstance(old_coder, AgentCoder) and not isinstance(coder, AgentCoder):
+ if old_coder.edit_format in ("agent", "subagent") and coder.edit_format not in (
+ "agent",
+ "subagent",
+ ):
if coder.mcp_manager and coder.mcp_manager.get_server("Local"):
await coder.mcp_manager.disconnect_server("Local")
@@ -1344,6 +1371,9 @@ def get_io(pretty):
def is_first_run_of_new_version(io, verbose=False):
"""Check if this is the first run of a new version/executable combination"""
+ from cecli import __version__
+ from cecli.helpers.file_searcher import handle_core_files
+
installs_file = handle_core_files(Path.home() / ".cecli" / "installs.json")
key = __version__, sys.executable
if ".dev" in __version__:
@@ -1378,6 +1408,8 @@ def is_first_run_of_new_version(io, verbose=False):
async def check_and_load_imports(io, is_first_run, verbose=False):
+ from cecli import urls
+
try:
if is_first_run:
if verbose:
diff --git a/cecli/mcp/manager.py b/cecli/mcp/manager.py
index ad50dc69bd3..4ee86d2496f 100644
--- a/cecli/mcp/manager.py
+++ b/cecli/mcp/manager.py
@@ -1,6 +1,5 @@
import asyncio
-from cecli.llm import litellm
from cecli.mcp.server import LocalServer, McpServer
from cecli.tools.utils.registry import ToolRegistry
@@ -112,6 +111,8 @@ async def connect_server(self, name: str) -> bool:
Returns:
Boolean indicating success or failure
"""
+ from litellm import experimental_mcp_client
+
server = self.get_server(name)
if not server:
self._log_warning(f"MCP server not found: {name}")
@@ -131,9 +132,7 @@ async def connect_server(self, name: str) -> bool:
try:
session = await server.connect()
- tools = await litellm.experimental_mcp_client.load_mcp_tools(
- session=session, format="openai"
- )
+ tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai")
self._server_tools[server.name] = tools
self._connected_servers.add(server)
self._log_verbose(f"Connected to MCP server: {name}")
diff --git a/cecli/models.py b/cecli/models.py
index 190757a4dc4..3f593cb7b75 100644
--- a/cecli/models.py
+++ b/cecli/models.py
@@ -148,6 +148,7 @@ def __init__(self):
self.cache_dir = handle_core_files(Path.home() / ".cecli" / "caches")
self.cache_file = self.cache_dir / "model_prices_and_context_window.json"
self.content = None
+ self._raw_content = None
self.local_model_metadata = {}
self.verify_ssl = True
self._cache_loaded = False
@@ -167,9 +168,9 @@ def _load_cache(self):
cache_age = time.time() - self.cache_file.stat().st_mtime
if cache_age < self.CACHE_TTL:
try:
- self.content = json.loads(self.cache_file.read_text())
+ self._raw_content = self.cache_file.read_text()
except json.JSONDecodeError:
- self.content = None
+ self._raw_content = None
except OSError:
pass
self._cache_loaded = True
@@ -180,9 +181,13 @@ def _update_cache(self):
response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl)
if response.status_code == 200:
- self.content = response.json()
+ # Use json.dumps(response.json()) instead of response.text for
+ # compatibility with mocked responses in tests
+ parsed = response.json()
+ self._raw_content = json.dumps(parsed)
try:
- self.cache_file.write_text(json.dumps(self.content, indent=4))
+ parsed = response.json()
+ self.cache_file.write_text(json.dumps(parsed, indent=4))
except OSError:
pass
except Exception as ex:
@@ -192,21 +197,59 @@ def _update_cache(self):
except OSError:
pass
+ def _get_entry_from_raw(self, key):
+ """Parse a single model entry from raw JSON string without loading the entire dict."""
+ if not self._raw_content:
+ return None
+ import re
+
+ escaped_key = re.escape(key)
+ pattern = rf'(? 0:
+ ch = self._raw_content[pos]
+ if escape:
+ escape = False
+ elif ch == "\\":
+ escape = True
+ elif ch == '"':
+ in_string = not in_string
+ elif not in_string:
+ if ch == "{":
+ depth += 1
+ elif ch == "}":
+ depth -= 1
+ pos += 1
+ if depth == 0:
+ entry_str = self._raw_content[start:pos]
+ return json.loads(entry_str)
+ return None
+
def get_model_from_cached_json_db(self, model):
data = self.local_model_metadata.get(model)
if data:
return data
self._load_cache()
- if not self.content:
+ if not self._raw_content:
self._update_cache()
- if not self.content:
+ if not self._raw_content:
return dict()
- info = self.content.get(model, dict())
+ info = self._get_entry_from_raw(model)
if info:
return info
pieces = model.split("/")
if len(pieces) == 2:
- info = self.content.get(pieces[1])
+ info = self._get_entry_from_raw(pieces[1])
if info and info.get("litellm_provider") == pieces[0]:
return info
return dict()
diff --git a/cecli/prompts/agent.yml b/cecli/prompts/agent.yml
index 4143d42d18e..ec1473e3f41 100644
--- a/cecli/prompts/agent.yml
+++ b/cecli/prompts/agent.yml
@@ -27,21 +27,22 @@ main_system: |
## FILE FORMAT
File contents will be prefixed with identifiers. Each line starts with a case-sensitive content ID followed by `::`. These are used to target where editing tools will perform edits.
- They are algorithmically generated, maintained, and subject to change. Do not search for these content IDs. Focus on the lines they identify.
+ They are generated and maintained by a custom algorithm and subject to change on edits. Do not search for these content IDs directly. You will not be able to generate them.
+ Focus on the lines they identify.
**Example File**
```
- il9n::#!/usr/bin/env python3
- faoZ::
- uXdn::def example_method():
- WAR5:: return "example"
- vwkS::
+ 事tN::#!/usr/bin/env python3
+ 看XX::
+ 法然t::def example_method():
+ ä个8:: return "example"
+ 都ъñ::
```
## Core Workflow
1. **Plan**: Start by using `UpdateTodoList` to outline the task.
2. **Explore**: Use discovery tools (`ExploreCode`, `Grep`, `Ls`) to research and gather understanding for you task. Modify search terms when errors are encountered.
- 3. **Execute**: Mark files as editable with `ContextManager` before attempting edits. Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made.
+ 3. **Execute**: Mark files as editable with `ResourceManager` before attempting edits. Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made.
4. **Verify & Recover**: If an edit fails or introduces linting errors, fix the error immediately. Use `UndoChange` if the errors are too complex to incrementally modify.
5. **Yield**: Use the `Yield` tool after accomplishing the goal and verifying any changes made. Provide helpful summaries of any changes.
@@ -51,7 +52,7 @@ main_system: |
## Operational Rules
- **Scope**: No unrequested refactors. Avoid full-file rewrites. Only modify what you are asked to.
- - **Hygiene**: Use `ContextManager`/`RemoveSkill` to evict unneeded files/skills immediately after use.
+ - **Hygiene**: Use `ResourceManager` to evict unneeded files/skills immediately after use.
- **Outputs**: Tool calls trigger turns. Never include tool syntax in final user summaries.
- **Sandbox**: Perform all verification and temp logic in `.cecli/temp`.
- **Responses**: Reason out loud through the problem but be brief.
@@ -70,7 +71,7 @@ system_reminder: |
{lazy_prompt}
{shell_cmd_reminder}
- ""
+
try_again: |
My previous exploration was insufficient. I will now adjust my strategy, use more specific search patterns, and manage my context more aggressively to find the correct solution.
\ No newline at end of file
diff --git a/cecli/prompts/subagent.yml b/cecli/prompts/subagent.yml
index e10c1935e1d..6e3867a9852 100644
--- a/cecli/prompts/subagent.yml
+++ b/cecli/prompts/subagent.yml
@@ -12,21 +12,22 @@ main_system: |
## FILE FORMAT
File contents will be prefixed with identifiers. Each line starts with a case-sensitive content ID followed by `::`. These are used to target where editing tools will perform edits.
- They are algorithmically generated, maintained, and subject to change. Do not search for these content IDs. Focus on the lines they identify.
+ They are generated and maintained by a custom algorithm and subject to change on edits. Do not search for these content IDs directly. You will not be able to generate them.
+ Focus on the lines they identify.
**Example File**
```
- il9n::#!/usr/bin/env python3
- faoZ::
- uXdn::def example_method():
- WAR5:: return "example"
- vwkS::
+ 事tN::#!/usr/bin/env python3
+ 看XX::
+ 法然t::def example_method():
+ ä个8:: return "example"
+ 都ъñ::
```
## Core Workflow
1. **Plan**: Start by using `UpdateTodoList` to outline the task.
2. **Explore**: Use discovery tools (`ExploreCode`, `Grep`, `Ls`) to research and gather understanding for you task. Modify search terms when errors are encountered.
- 3. **Execute**: Mark files as editable with `ContextManager` before attempting edits. Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made.
+ 3. **Execute**: Mark files as editable with `ResourceManager` before attempting edits. Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made.
4. **Verify & Recover**: If an edit fails or introduces linting errors, fix the error immediately. Use `UndoChange` if the errors are too complex to incrementally modify.
5. **Yield**: Use the `Yield` tool after accomplishing the goal and verifying any changes made. Provide helpful summaries of any changes.
@@ -36,7 +37,7 @@ main_system: |
## Operational Rules
- **Scope**: No unrequested refactors. Avoid full-file rewrites. Only modify what you are asked to.
- - **Hygiene**: Use `ContextManager`/`RemoveSkill` to evict unneeded files/skills immediately after use.
+ - **Hygiene**: Use `ResourceManager` to evict unneeded files/skills immediately after use.
- **Outputs**: Tool calls trigger turns. Never include tool syntax in final user summaries.
- **Sandbox**: Perform all verification and temp logic in `.cecli/temp`.
- **Responses**: Reason out loud through the problem but be brief.
diff --git a/cecli/repomap.py b/cecli/repomap.py
index 1f721ef0ca3..dbb9739d5c3 100644
--- a/cecli/repomap.py
+++ b/cecli/repomap.py
@@ -10,10 +10,7 @@
from importlib import resources
from pathlib import Path
-import tree_sitter
from diskcache import Cache
-from pygments.lexers import guess_lexer_for_filename
-from pygments.token import Token
from cecli.dump import dump
from cecli.helpers.similarity import (
@@ -22,18 +19,9 @@
normalize_vector,
)
from cecli.special import filter_important_files
-from cecli.tools.utils.helpers import ToolError
-# tree_sitter is throwing a FutureWarning
warnings.simplefilter("ignore", category=FutureWarning)
-from cecli.helpers.grep_ast import TreeContext, filename_to_lang # noqa: E402
-from cecli.helpers.grep_ast.tsl import ( # noqa: E402
- USING_TSL_PACK,
- get_language,
- get_parser,
-)
-
# Define the Tag namedtuple with a default for specific_kind to maintain compatibility
# with cached entries that might have been created with the old definition
@@ -117,9 +105,11 @@ def __new__(
SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError)
-CACHE_VERSION = 7
-if USING_TSL_PACK:
- CACHE_VERSION = 9
+# CACHE_VERSION determines tags cache format.
+# Set to 9 to ensure fresh cache when tree-sitter features are available.
+# tree_sitter is loaded lazily inside get_tags_raw(), so we don't
+# import it at module level here.
+CACHE_VERSION = 9
UPDATING_REPO_MAP_MESSAGE = "Updating repo map"
@@ -517,6 +507,8 @@ def get_symbol_definition_location(self, file_path, symbol_name):
Raises:
ToolError: If the symbol is not found, not unique, or not a definition.
"""
+ from cecli.tools.utils.helpers import ToolError
+
abs_path = self.io.root_abs_path(file_path) # Assuming io has this helper or similar
rel_path = self.get_rel_fname(abs_path) # Ensure we use consistent relative path
@@ -605,6 +597,13 @@ def check_import_match(self, definer, imports):
return False
def get_tags_raw(self, fname, rel_fname):
+ import tree_sitter
+ from pygments.lexers import guess_lexer_for_filename
+ from pygments.token import Token
+
+ from cecli.helpers.grep_ast import filename_to_lang
+ from cecli.helpers.grep_ast.tsl import USING_TSL_PACK, get_language, get_parser
+
lang = filename_to_lang(fname)
if not lang:
return
@@ -1279,6 +1278,8 @@ def get_ranked_tags_map_uncached(
def render_tree(
self, abs_fname, rel_fname, lois, line_numbers=False, start_line=None, end_line=None
):
+ from cecli.helpers.grep_ast import TreeContext
+
mtime = self.get_mtime(abs_fname)
key = (rel_fname, tuple(sorted(lois)), mtime, start_line, end_line)
@@ -1444,6 +1445,8 @@ def find_src_files(directory):
def get_scm_fname(lang):
+ from cecli.helpers.grep_ast.tsl import USING_TSL_PACK
+
# Load the tags queries
if USING_TSL_PACK:
subdir = "tree-sitter-language-pack"
diff --git a/cecli/resources/model-metadata.json b/cecli/resources/model-metadata.json
index 8aee168a3e5..4af6e04ffc7 100644
--- a/cecli/resources/model-metadata.json
+++ b/cecli/resources/model-metadata.json
@@ -353,6 +353,39 @@
"cache_read_input_token_cost": 3e-7,
"cache_creation_input_token_cost": 0.00000375
},
+ "anthropic.claude-fable-5": {
+ "cache_creation_input_token_cost": 0.0000125,
+ "cache_creation_input_token_cost_above_1hr": 0.00002,
+ "cache_read_input_token_cost": 0.000001,
+ "input_cost_per_token": 0.00001,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00005,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_native_structured_output": true,
+ "supports_max_reasoning_effort": true,
+ "supports_output_config": true,
+ "bedrock_output_config_effort_ceiling": "xhigh"
+ },
"anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.00000125,
"cache_creation_input_token_cost_above_1hr": 0.000002,
@@ -557,6 +590,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -581,6 +615,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -588,6 +623,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -1019,6 +1055,7 @@
},
"au.anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.000001375,
+ "cache_creation_input_token_cost_above_1hr": 0.0000022,
"cache_read_input_token_cost": 1.1e-7,
"input_cost_per_token": 0.0000011,
"litellm_provider": "bedrock_converse",
@@ -1040,6 +1077,7 @@
},
"au.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 0.000006875,
+ "cache_creation_input_token_cost_above_1hr": 0.000011,
"cache_read_input_token_cost": 5.5e-7,
"input_cost_per_token": 0.0000055,
"litellm_provider": "bedrock_converse",
@@ -1069,6 +1107,7 @@
},
"au.anthropic.claude-opus-4-7": {
"cache_creation_input_token_cost": 0.000006875,
+ "cache_creation_input_token_cost_above_1hr": 0.000011,
"cache_read_input_token_cost": 5.5e-7,
"input_cost_per_token": 0.0000055,
"litellm_provider": "bedrock_converse",
@@ -1089,6 +1128,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -1113,6 +1153,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -1120,6 +1161,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -1130,11 +1172,13 @@
},
"au.anthropic.claude-sonnet-4-5-20250929-v1:0": {
"cache_creation_input_token_cost": 0.000004125,
+ "cache_creation_input_token_cost_above_1hr": 0.0000066,
"cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"input_cost_per_token_above_200k_tokens": 0.0000066,
"output_cost_per_token_above_200k_tokens": 0.00002475,
"cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
+ "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.0000132,
"cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 200000,
@@ -1160,6 +1204,7 @@
},
"au.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.000004125,
+ "cache_creation_input_token_cost_above_1hr": 0.0000066,
"cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"litellm_provider": "bedrock_converse",
@@ -4531,6 +4576,36 @@
"supports_tool_choice": true,
"supports_reasoning": true
},
+ "azure_ai/claude-fable-5": {
+ "input_cost_per_token": 0.00001,
+ "output_cost_per_token": 0.00005,
+ "litellm_provider": "azure_ai",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "cache_creation_input_token_cost": 0.0000125,
+ "cache_creation_input_token_cost_above_1hr": 0.00002,
+ "cache_read_input_token_cost": 0.000001,
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_max_reasoning_effort": true
+ },
"azure_ai/claude-haiku-4-5": {
"cache_creation_input_token_cost": 0.00000125,
"cache_creation_input_token_cost_above_1hr": 0.000002,
@@ -4646,6 +4721,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -4667,6 +4743,7 @@
"cache_creation_input_token_cost": 0.00000625,
"cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -4674,6 +4751,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -4758,6 +4836,19 @@
"supports_function_calling": true,
"supports_tool_choice": true
},
+ "azure_ai/deepseek-v3.1": {
+ "input_cost_per_token": 0.00000123,
+ "litellm_provider": "azure_ai",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000494,
+ "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/deepseek/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true
+ },
"azure_ai/deepseek-v3.2": {
"input_cost_per_token": 5.8e-7,
"litellm_provider": "azure_ai",
@@ -4788,6 +4879,32 @@
"supports_reasoning": true,
"supports_tool_choice": true
},
+ "azure_ai/deepseek-v4-flash": {
+ "input_cost_per_token": 1.9e-7,
+ "litellm_provider": "azure_ai",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 384000,
+ "max_tokens": 384000,
+ "mode": "chat",
+ "output_cost_per_token": 5.1e-7,
+ "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/deepseek/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true
+ },
+ "azure_ai/deepseek-v4-pro": {
+ "input_cost_per_token": 0.00000174,
+ "litellm_provider": "azure_ai",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 384000,
+ "max_tokens": 384000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000348,
+ "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/deepseek/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true
+ },
"azure_ai/global/grok-3": {
"input_cost_per_token": 0.000003,
"litellm_provider": "azure_ai",
@@ -5099,6 +5216,100 @@
"supports_xhigh_reasoning_effort": true,
"supports_minimal_reasoning_effort": false
},
+ "azure_ai/gpt-5.5": {
+ "cache_read_input_token_cost": 5e-7,
+ "cache_read_input_token_cost_above_272k_tokens": 0.000001,
+ "cache_read_input_token_cost_priority": 0.000001,
+ "cache_read_input_token_cost_above_272k_tokens_priority": 0.000002,
+ "input_cost_per_token": 0.000005,
+ "input_cost_per_token_above_272k_tokens": 0.00001,
+ "input_cost_per_token_priority": 0.00001,
+ "input_cost_per_token_above_272k_tokens_priority": 0.00002,
+ "litellm_provider": "azure_ai",
+ "max_input_tokens": 1050000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00003,
+ "output_cost_per_token_above_272k_tokens": 0.000045,
+ "output_cost_per_token_priority": 0.00006,
+ "output_cost_per_token_above_272k_tokens_priority": 0.00009,
+ "source": "https://ai.azure.com/catalog/models/gpt-5.5",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/batch",
+ "/v1/responses"
+ ],
+ "supported_modalities": [
+ "text",
+ "image"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_service_tier": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_none_reasoning_effort": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": false
+ },
+ "azure_ai/gpt-5.5-2026-04-23": {
+ "cache_read_input_token_cost": 5e-7,
+ "cache_read_input_token_cost_above_272k_tokens": 0.000001,
+ "cache_read_input_token_cost_priority": 0.000001,
+ "cache_read_input_token_cost_above_272k_tokens_priority": 0.000002,
+ "input_cost_per_token": 0.000005,
+ "input_cost_per_token_above_272k_tokens": 0.00001,
+ "input_cost_per_token_priority": 0.00001,
+ "input_cost_per_token_above_272k_tokens_priority": 0.00002,
+ "litellm_provider": "azure_ai",
+ "max_input_tokens": 1050000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00003,
+ "output_cost_per_token_above_272k_tokens": 0.000045,
+ "output_cost_per_token_priority": 0.00006,
+ "output_cost_per_token_above_272k_tokens_priority": 0.00009,
+ "source": "https://ai.azure.com/catalog/models/gpt-5.5",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/batch",
+ "/v1/responses"
+ ],
+ "supported_modalities": [
+ "text",
+ "image"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_service_tier": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_none_reasoning_effort": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": false
+ },
"azure_ai/gpt-oss-120b": {
"input_cost_per_token": 1.5e-7,
"output_cost_per_token": 6e-7,
@@ -5260,6 +5471,27 @@
"supports_video_input": true,
"supports_vision": true
},
+ "azure_ai/kimi-k2.6": {
+ "input_cost_per_token": 9.5e-7,
+ "litellm_provider": "azure_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000004,
+ "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/introducing-kimi-k2-6-in-microsoft-foundry/4513125",
+ "supported_modalities": [
+ "text",
+ "image"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"azure_ai/ministral-3b": {
"input_cost_per_token": 4e-8,
"litellm_provider": "azure_ai",
@@ -6729,6 +6961,7 @@
},
"bedrock/us-gov-east-1/anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.0000015,
+ "cache_creation_input_token_cost_above_1hr": 0.0000024,
"cache_read_input_token_cost": 1.2e-7,
"input_cost_per_token": 0.0000012,
"litellm_provider": "bedrock",
@@ -6750,15 +6983,16 @@
"supports_pdf_input": true
},
"bedrock/us-gov-east-1/anthropic.claude-sonnet-4-5-20250929-v1:0": {
- "cache_creation_input_token_cost": 0.000004125,
- "cache_read_input_token_cost": 3.3e-7,
- "input_cost_per_token": 0.0000033,
+ "cache_creation_input_token_cost": 0.0000045,
+ "cache_creation_input_token_cost_above_1hr": 0.0000072,
+ "cache_read_input_token_cost": 3.6e-7,
+ "input_cost_per_token": 0.0000036,
"litellm_provider": "bedrock",
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"max_tokens": 8192,
"mode": "chat",
- "output_cost_per_token": 0.0000165,
+ "output_cost_per_token": 0.000018,
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -6771,15 +7005,16 @@
"supports_native_structured_output": true
},
"bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": {
- "cache_creation_input_token_cost": 0.000004125,
- "cache_read_input_token_cost": 3.3e-7,
- "input_cost_per_token": 0.0000033,
+ "cache_creation_input_token_cost": 0.0000045,
+ "cache_creation_input_token_cost_above_1hr": 0.0000072,
+ "cache_read_input_token_cost": 3.6e-7,
+ "input_cost_per_token": 0.0000036,
"litellm_provider": "bedrock",
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"max_tokens": 8192,
"mode": "chat",
- "output_cost_per_token": 0.0000165,
+ "output_cost_per_token": 0.000018,
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -6906,6 +7141,7 @@
},
"bedrock/us-gov-west-1/anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.0000015,
+ "cache_creation_input_token_cost_above_1hr": 0.0000024,
"cache_read_input_token_cost": 1.2e-7,
"input_cost_per_token": 0.0000012,
"litellm_provider": "bedrock",
@@ -6927,15 +7163,16 @@
"supports_pdf_input": true
},
"bedrock/us-gov-west-1/anthropic.claude-sonnet-4-5-20250929-v1:0": {
- "cache_creation_input_token_cost": 0.000004125,
- "cache_read_input_token_cost": 3.3e-7,
- "input_cost_per_token": 0.0000033,
+ "cache_creation_input_token_cost": 0.0000045,
+ "cache_creation_input_token_cost_above_1hr": 0.0000072,
+ "cache_read_input_token_cost": 3.6e-7,
+ "input_cost_per_token": 0.0000036,
"litellm_provider": "bedrock",
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"max_tokens": 8192,
"mode": "chat",
- "output_cost_per_token": 0.0000165,
+ "output_cost_per_token": 0.000018,
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -6948,15 +7185,16 @@
"supports_native_structured_output": true
},
"bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": {
- "cache_creation_input_token_cost": 0.000004125,
- "cache_read_input_token_cost": 3.3e-7,
- "input_cost_per_token": 0.0000033,
+ "cache_creation_input_token_cost": 0.0000045,
+ "cache_creation_input_token_cost_above_1hr": 0.0000072,
+ "cache_read_input_token_cost": 3.6e-7,
+ "input_cost_per_token": 0.0000036,
"litellm_provider": "bedrock",
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"max_tokens": 8192,
"mode": "chat",
- "output_cost_per_token": 0.0000165,
+ "output_cost_per_token": 0.000018,
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -7232,6 +7470,63 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "bedrock_mantle/google.gemma-4-26b-a4b": {
+ "input_cost_per_token": 1.3e-7,
+ "output_cost_per_token": 4e-7,
+ "litellm_provider": "bedrock_mantle",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 256000,
+ "max_tokens": 256000,
+ "mode": "chat",
+ "use_openai_responses_path": true,
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/responses"
+ ],
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": false,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "bedrock_mantle/google.gemma-4-31b": {
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 4e-7,
+ "litellm_provider": "bedrock_mantle",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 256000,
+ "max_tokens": 256000,
+ "mode": "chat",
+ "use_openai_responses_path": true,
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/responses"
+ ],
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": false,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "bedrock_mantle/google.gemma-4-e2b": {
+ "input_cost_per_token": 4e-8,
+ "output_cost_per_token": 8e-8,
+ "litellm_provider": "bedrock_mantle",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "use_openai_responses_path": true,
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/responses"
+ ],
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": false,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"bedrock_mantle/openai.gpt-oss-120b": {
"input_cost_per_token": 1.5e-7,
"output_cost_per_token": 6e-7,
@@ -7240,6 +7535,10 @@
"max_output_tokens": 32768,
"max_tokens": 32768,
"mode": "chat",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/responses"
+ ],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_reasoning": true,
@@ -7254,6 +7553,10 @@
"max_output_tokens": 32768,
"max_tokens": 32768,
"mode": "chat",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/responses"
+ ],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_reasoning": true,
@@ -7268,6 +7571,9 @@
"max_output_tokens": 65536,
"max_tokens": 65536,
"mode": "chat",
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
"supports_function_calling": true,
"supports_reasoning": true,
"supports_response_schema": true,
@@ -7281,6 +7587,9 @@
"max_output_tokens": 65536,
"max_tokens": 65536,
"mode": "chat",
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
"supports_function_calling": true,
"supports_reasoning": true,
"supports_response_schema": true,
@@ -7579,6 +7888,40 @@
"supports_vision": true,
"supports_web_search": true
},
+ "claude-fable-5": {
+ "cache_creation_input_token_cost": 0.0000125,
+ "cache_creation_input_token_cost_above_1hr": 0.00002,
+ "cache_read_input_token_cost": 0.000001,
+ "input_cost_per_token": 0.00001,
+ "litellm_provider": "anthropic",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00005,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_max_reasoning_effort": true,
+ "provider_specific_entry": {
+ "us": 1.1
+ },
+ "supports_output_config": true
+ },
"claude-haiku-4-5": {
"cache_creation_input_token_cost": 0.00000125,
"cache_creation_input_token_cost_above_1hr": 0.000002,
@@ -7845,6 +8188,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -7879,6 +8223,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -7913,6 +8258,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -7956,6 +8302,8 @@
},
"claude-sonnet-4-5": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
+ "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.000012,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"input_cost_per_token_above_200k_tokens": 0.000006,
@@ -7985,6 +8333,8 @@
},
"claude-sonnet-4-5-20250929": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
+ "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.000012,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"input_cost_per_token_above_200k_tokens": 0.000006,
@@ -8015,6 +8365,8 @@
},
"claude-sonnet-4-5-20250929-v1:0": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
+ "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.000012,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"input_cost_per_token_above_200k_tokens": 0.000006,
@@ -8039,6 +8391,7 @@
},
"claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"litellm_provider": "anthropic",
@@ -8231,13 +8584,13 @@
"supports_tool_choice": true
},
"command-r7b-12-2024": {
- "input_cost_per_token": 1.5e-7,
+ "input_cost_per_token": 3.75e-8,
"litellm_provider": "cohere_chat",
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"max_tokens": 4096,
"mode": "chat",
- "output_cost_per_token": 3.75e-8,
+ "output_cost_per_token": 1.5e-7,
"source": "https://docs.cohere.com/v2/docs/command-r7b",
"supports_function_calling": true,
"supports_tool_choice": true
@@ -9980,7 +10333,8 @@
"litellm_provider": "deepinfra",
"mode": "chat",
"supports_tool_choice": true,
- "supports_function_calling": true
+ "supports_function_calling": true,
+ "supports_image_size": false
},
"deepinfra/google/gemini-2.5-pro": {
"max_tokens": 1000000,
@@ -10377,6 +10731,56 @@
"supports_reasoning": true,
"supports_tool_choice": true
},
+ "deepseek-v4-flash": {
+ "cache_creation_input_token_cost": 0,
+ "cache_read_input_token_cost": 2.8e-9,
+ "input_cost_per_token": 1.4e-7,
+ "input_cost_per_token_cache_hit": 2.8e-9,
+ "litellm_provider": "deepseek",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 2.8e-7,
+ "source": "https://api-docs.deepseek.com/quick_start/pricing",
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_prompt_caching": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "deepseek-v4-pro": {
+ "cache_creation_input_token_cost": 0,
+ "cache_read_input_token_cost": 3.625e-9,
+ "input_cost_per_token": 4.35e-7,
+ "input_cost_per_token_cache_hit": 3.625e-9,
+ "litellm_provider": "deepseek",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 8.7e-7,
+ "source": "https://api-docs.deepseek.com/quick_start/pricing",
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_prompt_caching": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
"deepseek.v3-v1:0": {
"input_cost_per_token": 5.8e-7,
"litellm_provider": "bedrock_converse",
@@ -10511,6 +10915,56 @@
"supports_reasoning": true,
"supports_tool_choice": true
},
+ "deepseek/deepseek-v4-flash": {
+ "cache_creation_input_token_cost": 0,
+ "cache_read_input_token_cost": 2.8e-9,
+ "input_cost_per_token": 1.4e-7,
+ "input_cost_per_token_cache_hit": 2.8e-9,
+ "litellm_provider": "deepseek",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 2.8e-7,
+ "source": "https://api-docs.deepseek.com/quick_start/pricing",
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_prompt_caching": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "deepseek/deepseek-v4-pro": {
+ "cache_creation_input_token_cost": 0,
+ "cache_read_input_token_cost": 3.625e-9,
+ "input_cost_per_token": 4.35e-7,
+ "input_cost_per_token_cache_hit": 3.625e-9,
+ "litellm_provider": "deepseek",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 8.7e-7,
+ "source": "https://api-docs.deepseek.com/quick_start/pricing",
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_prompt_caching": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
"eu.amazon.nova-2-lite-v1:0": {
"cache_read_input_token_cost": 8.25e-8,
"input_cost_per_token": 3.3e-7,
@@ -10707,8 +11161,42 @@
"cache_read_input_token_cost": 3e-7,
"cache_creation_input_token_cost": 0.00000375
},
+ "eu.anthropic.claude-fable-5": {
+ "cache_creation_input_token_cost": 0.00001375,
+ "cache_creation_input_token_cost_above_1hr": 0.000022,
+ "cache_read_input_token_cost": 0.0000011,
+ "input_cost_per_token": 0.000011,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000055,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_native_structured_output": true,
+ "supports_max_reasoning_effort": true,
+ "supports_output_config": true,
+ "bedrock_output_config_effort_ceiling": "xhigh"
+ },
"eu.anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.000001375,
+ "cache_creation_input_token_cost_above_1hr": 0.0000022,
"cache_read_input_token_cost": 1.1e-7,
"input_cost_per_token": 0.0000011,
"deprecation_date": "2026-10-15",
@@ -10810,6 +11298,7 @@
},
"eu.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 0.000006875,
+ "cache_creation_input_token_cost_above_1hr": 0.000011,
"cache_read_input_token_cost": 5.5e-7,
"input_cost_per_token": 0.0000055,
"litellm_provider": "bedrock_converse",
@@ -10839,6 +11328,7 @@
},
"eu.anthropic.claude-opus-4-7": {
"cache_creation_input_token_cost": 0.000006875,
+ "cache_creation_input_token_cost_above_1hr": 0.000011,
"cache_read_input_token_cost": 5.5e-7,
"input_cost_per_token": 0.0000055,
"litellm_provider": "bedrock_converse",
@@ -10859,6 +11349,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -10883,6 +11374,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -10890,6 +11382,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -10929,11 +11422,13 @@
},
"eu.anthropic.claude-sonnet-4-5-20250929-v1:0": {
"cache_creation_input_token_cost": 0.000004125,
+ "cache_creation_input_token_cost_above_1hr": 0.0000066,
"cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"input_cost_per_token_above_200k_tokens": 0.0000066,
"output_cost_per_token_above_200k_tokens": 0.00002475,
"cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
+ "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.0000132,
"cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 200000,
@@ -10959,6 +11454,7 @@
},
"eu.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.000004125,
+ "cache_creation_input_token_cost_above_1hr": 0.0000066,
"cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"litellm_provider": "bedrock_converse",
@@ -11524,6 +12020,38 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "fireworks_ai/accounts/fireworks/models/deepseek-v4-flash": {
+ "cache_read_input_token_cost": 2.8e-8,
+ "input_cost_per_token": 1.4e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 384000,
+ "max_tokens": 384000,
+ "mode": "chat",
+ "output_cost_per_token": 2.8e-7,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/accounts/fireworks/models/deepseek-v4-pro": {
+ "cache_read_input_token_cost": 1.45e-7,
+ "input_cost_per_token": 0.00000174,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 384000,
+ "max_tokens": 384000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000348,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
"fireworks_ai/accounts/fireworks/models/devstral-small-2505": {
"max_tokens": 131072,
"max_input_tokens": 131072,
@@ -11780,43 +12308,64 @@
"input_cost_per_token": 0.0000014,
"litellm_provider": "fireworks_ai",
"max_input_tokens": 202800,
- "max_output_tokens": 202800,
- "max_tokens": 202800,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
"mode": "chat",
"output_cost_per_token": 0.0000044,
- "source": "https://fireworks.ai/models/fireworks/glm-5p1",
- "supports_function_calling": false,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
"supports_reasoning": true,
- "supports_response_schema": false,
- "supports_tool_choice": false
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/accounts/fireworks/models/glm-5p2": {
+ "cache_read_input_token_cost": 2.6e-7,
+ "input_cost_per_token": 0.0000014,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000044,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
},
"fireworks_ai/accounts/fireworks/models/gpt-oss-120b": {
+ "cache_read_input_token_cost": 1.5e-8,
"input_cost_per_token": 1.5e-7,
"litellm_provider": "fireworks_ai",
"max_input_tokens": 131072,
- "max_output_tokens": 131072,
- "max_tokens": 131072,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
"mode": "chat",
"output_cost_per_token": 6e-7,
- "source": "https://fireworks.ai/pricing",
+ "source": "https://docs.fireworks.ai/serverless/pricing",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_response_schema": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_vision": false
},
"fireworks_ai/accounts/fireworks/models/gpt-oss-20b": {
- "input_cost_per_token": 5e-8,
+ "cache_read_input_token_cost": 3.5e-8,
+ "input_cost_per_token": 7e-8,
"litellm_provider": "fireworks_ai",
"max_input_tokens": 131072,
- "max_output_tokens": 131072,
- "max_tokens": 131072,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
"mode": "chat",
- "output_cost_per_token": 2e-7,
- "source": "https://fireworks.ai/pricing",
+ "output_cost_per_token": 3e-7,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_response_schema": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_vision": false
},
"fireworks_ai/accounts/fireworks/models/gpt-oss-safeguard-120b": {
"max_tokens": 131072,
@@ -11953,6 +12502,38 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "fireworks_ai/accounts/fireworks/models/kimi-k2p6": {
+ "cache_read_input_token_cost": 1.6e-7,
+ "input_cost_per_token": 9.5e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000004,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "fireworks_ai/accounts/fireworks/models/kimi-k2p7-code": {
+ "cache_read_input_token_cost": 1.9e-7,
+ "input_cost_per_token": 9.5e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000004,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"fireworks_ai/accounts/fireworks/models/llama-guard-2-8b": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@@ -12286,6 +12867,38 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "fireworks_ai/accounts/fireworks/models/minimax-m2p7": {
+ "cache_read_input_token_cost": 6e-8,
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 196608,
+ "max_output_tokens": 196608,
+ "max_tokens": 196608,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000012,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/accounts/fireworks/models/minimax-m3": {
+ "cache_read_input_token_cost": 6e-8,
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 512000,
+ "max_output_tokens": 512000,
+ "max_tokens": 512000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000012,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"fireworks_ai/accounts/fireworks/models/ministral-3-14b-instruct-2512": {
"max_tokens": 256000,
"max_input_tokens": 256000,
@@ -13228,6 +13841,22 @@
"litellm_provider": "fireworks_ai",
"mode": "chat"
},
+ "fireworks_ai/accounts/fireworks/models/qwen3p7-plus": {
+ "cache_read_input_token_cost": 8e-8,
+ "input_cost_per_token": 4e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000016,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"fireworks_ai/accounts/fireworks/models/qwq-32b": {
"max_tokens": 131072,
"max_input_tokens": 131072,
@@ -13376,6 +14005,86 @@
"litellm_provider": "fireworks_ai",
"mode": "chat"
},
+ "fireworks_ai/accounts/fireworks/routers/glm-5p1-fast": {
+ "cache_read_input_token_cost": 5.2e-7,
+ "input_cost_per_token": 0.0000028,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 202800,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000088,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/accounts/fireworks/routers/kimi-k2p6-fast": {
+ "cache_read_input_token_cost": 3e-7,
+ "input_cost_per_token": 0.000002,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000008,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "fireworks_ai/accounts/fireworks/routers/kimi-k2p7-code-fast": {
+ "cache_read_input_token_cost": 3.8e-7,
+ "input_cost_per_token": 0.0000019,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000008,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "fireworks_ai/deepseek-v4-flash": {
+ "cache_read_input_token_cost": 2.8e-8,
+ "input_cost_per_token": 1.4e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 384000,
+ "max_tokens": 384000,
+ "mode": "chat",
+ "output_cost_per_token": 2.8e-7,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/deepseek-v4-pro": {
+ "cache_read_input_token_cost": 1.45e-7,
+ "input_cost_per_token": 0.00000174,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 384000,
+ "max_tokens": 384000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000348,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
"fireworks_ai/glm-4p7": {
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 6e-7,
@@ -13396,15 +14105,80 @@
"input_cost_per_token": 0.0000014,
"litellm_provider": "fireworks_ai",
"max_input_tokens": 202800,
- "max_output_tokens": 202800,
- "max_tokens": 202800,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
"mode": "chat",
"output_cost_per_token": 0.0000044,
- "source": "https://fireworks.ai/models/fireworks/glm-5p1",
- "supports_function_calling": false,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
"supports_reasoning": true,
- "supports_response_schema": false,
- "supports_tool_choice": false
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/glm-5p1-fast": {
+ "cache_read_input_token_cost": 5.2e-7,
+ "input_cost_per_token": 0.0000028,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 202800,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000088,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/glm-5p2": {
+ "cache_read_input_token_cost": 2.6e-7,
+ "input_cost_per_token": 0.0000014,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000044,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/gpt-oss-120b": {
+ "cache_read_input_token_cost": 1.5e-8,
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "output_cost_per_token": 6e-7,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/gpt-oss-20b": {
+ "cache_read_input_token_cost": 3.5e-8,
+ "input_cost_per_token": 7e-8,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "output_cost_per_token": 3e-7,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
},
"fireworks_ai/kimi-k2p5": {
"cache_read_input_token_cost": 1e-7,
@@ -13420,6 +14194,70 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "fireworks_ai/kimi-k2p6": {
+ "cache_read_input_token_cost": 1.6e-7,
+ "input_cost_per_token": 9.5e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000004,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "fireworks_ai/kimi-k2p6-fast": {
+ "cache_read_input_token_cost": 3e-7,
+ "input_cost_per_token": 0.000002,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000008,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "fireworks_ai/kimi-k2p7-code": {
+ "cache_read_input_token_cost": 1.9e-7,
+ "input_cost_per_token": 9.5e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000004,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "fireworks_ai/kimi-k2p7-code-fast": {
+ "cache_read_input_token_cost": 3.8e-7,
+ "input_cost_per_token": 0.0000019,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.000008,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"fireworks_ai/minimax-m2p1": {
"cache_read_input_token_cost": 3e-8,
"input_cost_per_token": 3e-7,
@@ -13434,6 +14272,54 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "fireworks_ai/minimax-m2p7": {
+ "cache_read_input_token_cost": 6e-8,
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 196608,
+ "max_output_tokens": 196608,
+ "max_tokens": 196608,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000012,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": false
+ },
+ "fireworks_ai/minimax-m3": {
+ "cache_read_input_token_cost": 6e-8,
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 512000,
+ "max_output_tokens": 512000,
+ "max_tokens": 512000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000012,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "fireworks_ai/qwen3p7-plus": {
+ "cache_read_input_token_cost": 8e-8,
+ "input_cost_per_token": 4e-7,
+ "litellm_provider": "fireworks_ai",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000016,
+ "source": "https://docs.fireworks.ai/serverless/pricing",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"friendliai/meta-llama-3.1-70b-instruct": {
"input_cost_per_token": 6e-7,
"litellm_provider": "friendliai",
@@ -13893,7 +14779,8 @@
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
},
- "supports_service_tier": true
+ "supports_service_tier": true,
+ "supports_image_size": false
},
"gemini-2.5-flash-image": {
"cache_read_input_token_cost": 3e-8,
@@ -13943,7 +14830,8 @@
"supports_vision": true,
"supports_web_search": false,
"tpm": 8000000,
- "supports_service_tier": true
+ "supports_service_tier": true,
+ "supports_image_size": false
},
"gemini-2.5-flash-lite": {
"cache_read_input_token_cost": 1e-8,
@@ -13994,7 +14882,8 @@
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
},
- "supports_service_tier": true
+ "supports_service_tier": true,
+ "supports_image_size": false
},
"gemini-2.5-flash-lite-preview-06-17": {
"deprecation_date": "2025-11-18",
@@ -14045,7 +14934,8 @@
"search_context_size_low": 0.035,
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
- }
+ },
+ "supports_image_size": false
},
"gemini-2.5-flash-lite-preview-09-2025": {
"cache_read_input_token_cost": 1e-8,
@@ -14095,7 +14985,8 @@
"search_context_size_low": 0.035,
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
- }
+ },
+ "supports_image_size": false
},
"gemini-2.5-flash-native-audio-latest": {
"input_cost_per_audio_token": 0.000001,
@@ -14217,7 +15108,8 @@
"search_context_size_low": 0.035,
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
- }
+ },
+ "supports_image_size": false
},
"gemini-2.5-pro": {
"cache_read_input_token_cost": 1.25e-7,
@@ -15387,7 +16279,8 @@
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
},
- "supports_service_tier": true
+ "supports_service_tier": true,
+ "supports_image_size": false
},
"gemini/gemini-2.5-flash-image": {
"cache_read_input_token_cost": 3e-8,
@@ -15443,7 +16336,8 @@
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
},
- "supports_service_tier": true
+ "supports_service_tier": true,
+ "supports_image_size": false
},
"gemini/gemini-2.5-flash-lite": {
"cache_read_input_token_cost": 1e-8,
@@ -15496,7 +16390,8 @@
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
},
- "supports_service_tier": true
+ "supports_service_tier": true,
+ "supports_image_size": false
},
"gemini/gemini-2.5-flash-lite-preview-06-17": {
"deprecation_date": "2025-11-18",
@@ -15549,7 +16444,8 @@
"search_context_size_low": 0.035,
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
- }
+ },
+ "supports_image_size": false
},
"gemini/gemini-2.5-flash-lite-preview-09-2025": {
"cache_read_input_token_cost": 1e-8,
@@ -15601,7 +16497,8 @@
"search_context_size_low": 0.035,
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
- }
+ },
+ "supports_image_size": false
},
"gemini/gemini-2.5-flash-native-audio-latest": {
"input_cost_per_audio_token": 0.000001,
@@ -15731,7 +16628,8 @@
"search_context_size_low": 0.035,
"search_context_size_medium": 0.035,
"search_context_size_high": 0.035
- }
+ },
+ "supports_image_size": false
},
"gemini/gemini-2.5-pro": {
"cache_read_input_token_cost": 1.25e-7,
@@ -17088,6 +17986,38 @@
"supports_response_schema": true,
"supports_vision": true
},
+ "github_copilot/mai-code-1-flash": {
+ "cache_read_input_token_cost": 7.5e-8,
+ "input_cost_per_token": 7.5e-7,
+ "litellm_provider": "github_copilot",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000045,
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_response_schema": true
+ },
+ "github_copilot/mai-code-1-flash-internal": {
+ "cache_read_input_token_cost": 7.5e-8,
+ "input_cost_per_token": 7.5e-7,
+ "litellm_provider": "github_copilot",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000045,
+ "supported_endpoints": [
+ "/v1/chat/completions"
+ ],
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_response_schema": true
+ },
"glm-4-7-251222": {
"input_cost_per_token": 0,
"litellm_provider": "volcengine",
@@ -17119,6 +18049,39 @@
"supports_video_input": true,
"supports_vision": true
},
+ "global.anthropic.claude-fable-5": {
+ "cache_creation_input_token_cost": 0.0000125,
+ "cache_creation_input_token_cost_above_1hr": 0.00002,
+ "cache_read_input_token_cost": 0.000001,
+ "input_cost_per_token": 0.00001,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00005,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_native_structured_output": true,
+ "supports_max_reasoning_effort": true,
+ "supports_output_config": true,
+ "bedrock_output_config_effort_ceiling": "xhigh"
+ },
"global.anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.00000125,
"cache_creation_input_token_cost_above_1hr": 0.000002,
@@ -17224,6 +18187,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -17248,6 +18212,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -17255,6 +18220,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -20412,6 +21378,21 @@
"supports_system_messages": true,
"supports_tool_choice": true
},
+ "inception/mercury-2": {
+ "cache_read_input_token_cost": 2.5e-8,
+ "input_cost_per_token": 2.5e-7,
+ "litellm_provider": "inception",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 50000,
+ "max_tokens": 50000,
+ "mode": "chat",
+ "output_cost_per_token": 7.5e-7,
+ "supports_function_calling": true,
+ "supports_prompt_caching": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true
+ },
"jamba-1.5": {
"input_cost_per_token": 2e-7,
"litellm_provider": "ai21",
@@ -20504,6 +21485,7 @@
},
"jp.anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.000001375,
+ "cache_creation_input_token_cost_above_1hr": 0.0000022,
"cache_read_input_token_cost": 1.1e-7,
"input_cost_per_token": 0.0000011,
"litellm_provider": "bedrock_converse",
@@ -20524,13 +21506,46 @@
"supports_vision": true,
"supports_native_structured_output": true
},
+ "jp.anthropic.claude-opus-4-7": {
+ "cache_creation_input_token_cost": 0.000006875,
+ "cache_read_input_token_cost": 5.5e-7,
+ "input_cost_per_token": 0.0000055,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000275,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true,
+ "supports_max_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": true
+ },
"jp.anthropic.claude-sonnet-4-5-20250929-v1:0": {
"cache_creation_input_token_cost": 0.000004125,
+ "cache_creation_input_token_cost_above_1hr": 0.0000066,
"cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"input_cost_per_token_above_200k_tokens": 0.0000066,
"output_cost_per_token_above_200k_tokens": 0.00002475,
"cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
+ "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.0000132,
"cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 200000,
@@ -20556,6 +21571,7 @@
},
"jp.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.000004125,
+ "cache_creation_input_token_cost_above_1hr": 0.0000066,
"cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"litellm_provider": "bedrock_converse",
@@ -20921,6 +21937,165 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "libertai/deepseek-v4-flash": {
+ "max_tokens": 200000,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 200000,
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.00000175,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": false,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/deepseek-v4-flash-thinking": {
+ "max_tokens": 200000,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 200000,
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.00000175,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": false,
+ "supports_reasoning": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/gemma-4-31b-it": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 4e-7,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/gemma-4-31b-it-thinking": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 4e-7,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "supports_reasoning": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/hermes-3-8b-tee": {
+ "max_tokens": 16000,
+ "max_input_tokens": 16000,
+ "max_output_tokens": 16000,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": false,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/qwen3.5-122b-a10b": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.00000175,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/qwen3.5-122b-a10b-thinking": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 2.5e-7,
+ "output_cost_per_token": 0.00000175,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "supports_reasoning": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/qwen3.6-27b": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 5e-7,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/qwen3.6-27b-thinking": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 5e-7,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "supports_reasoning": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/qwen3.6-35b-a3b": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 5e-7,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
+ "libertai/qwen3.6-35b-a3b-thinking": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 5e-7,
+ "litellm_provider": "libertai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "supports_reasoning": true,
+ "source": "https://docs.libertai.io/apis/text/"
+ },
"llamagate/codellama-7b": {
"max_tokens": 4096,
"max_input_tokens": 16384,
@@ -21448,6 +22623,24 @@
"max_input_tokens": 1000000,
"max_output_tokens": 8192
},
+ "minimax/MiniMax-M3": {
+ "input_cost_per_token": 3e-7,
+ "input_cost_per_token_above_512k_tokens": 6e-7,
+ "output_cost_per_token": 0.0000012,
+ "output_cost_per_token_above_512k_tokens": 0.0000024,
+ "cache_read_input_token_cost": 6e-8,
+ "cache_read_input_token_cost_above_512k_tokens": 1.2e-7,
+ "litellm_provider": "minimax",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_vision": true,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000
+ },
"mistral.devstral-2-123b": {
"input_cost_per_token": 4e-7,
"litellm_provider": "bedrock_converse",
@@ -21923,6 +23116,21 @@
"supports_tool_choice": true,
"supports_vision": true
},
+ "mistral/ministral-8b-latest": {
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 1.5e-7,
+ "source": "https://mistral.ai/pricing",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"mistral/mistral-large-2402": {
"input_cost_per_token": 0.000004,
"litellm_provider": "mistral",
@@ -22059,6 +23267,21 @@
"supports_tool_choice": true,
"supports_vision": true
},
+ "mistral/mistral-medium-3-5": {
+ "input_cost_per_token": 0.0000015,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000075,
+ "source": "https://docs.mistral.ai/models/model-cards/mistral-medium-3-5-26-04",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"mistral/mistral-medium-latest": {
"input_cost_per_token": 4e-7,
"litellm_provider": "mistral",
@@ -22260,6 +23483,7 @@
},
"moonshot/kimi-k2-0711-preview": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-05-25",
"input_cost_per_token": 6e-7,
"litellm_provider": "moonshot",
"max_input_tokens": 131072,
@@ -22274,6 +23498,7 @@
},
"moonshot/kimi-k2-0905-preview": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-05-25",
"input_cost_per_token": 6e-7,
"litellm_provider": "moonshot",
"max_input_tokens": 262144,
@@ -22288,6 +23513,7 @@
},
"moonshot/kimi-k2-thinking": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-05-25",
"input_cost_per_token": 6e-7,
"litellm_provider": "moonshot",
"max_input_tokens": 262144,
@@ -22303,6 +23529,7 @@
},
"moonshot/kimi-k2-thinking-turbo": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-05-25",
"input_cost_per_token": 0.00000115,
"litellm_provider": "moonshot",
"max_input_tokens": 262144,
@@ -22318,6 +23545,7 @@
},
"moonshot/kimi-k2-turbo-preview": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-05-25",
"input_cost_per_token": 0.00000115,
"litellm_provider": "moonshot",
"max_input_tokens": 262144,
@@ -22342,6 +23570,7 @@
"source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart",
"supports_function_calling": true,
"supports_reasoning": true,
+ "supports_response_schema": true,
"supports_tool_choice": true,
"supports_video_input": true,
"supports_vision": true
@@ -22358,12 +23587,14 @@
"source": "https://platform.kimi.ai/docs/pricing/chat-k26",
"supports_function_calling": true,
"supports_reasoning": true,
+ "supports_response_schema": true,
"supports_tool_choice": true,
"supports_video_input": true,
"supports_vision": true
},
"moonshot/kimi-latest": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-01-28",
"input_cost_per_token": 0.000002,
"litellm_provider": "moonshot",
"max_input_tokens": 131072,
@@ -22378,6 +23609,7 @@
},
"moonshot/kimi-latest-128k": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-01-28",
"input_cost_per_token": 0.000002,
"litellm_provider": "moonshot",
"max_input_tokens": 131072,
@@ -22392,6 +23624,7 @@
},
"moonshot/kimi-latest-32k": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-01-28",
"input_cost_per_token": 0.000001,
"litellm_provider": "moonshot",
"max_input_tokens": 32768,
@@ -22406,6 +23639,7 @@
},
"moonshot/kimi-latest-8k": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2026-01-28",
"input_cost_per_token": 2e-7,
"litellm_provider": "moonshot",
"max_input_tokens": 8192,
@@ -22420,6 +23654,7 @@
},
"moonshot/kimi-thinking-preview": {
"cache_read_input_token_cost": 1.5e-7,
+ "deprecation_date": "2025-11-11",
"input_cost_per_token": 6e-7,
"litellm_provider": "moonshot",
"max_input_tokens": 131072,
@@ -22440,9 +23675,11 @@
"output_cost_per_token": 0.000005,
"source": "https://platform.moonshot.ai/docs/pricing",
"supports_function_calling": true,
+ "supports_response_schema": true,
"supports_tool_choice": true
},
"moonshot/moonshot-v1-128k-0430": {
+ "deprecation_date": "2024-04-30",
"input_cost_per_token": 0.000002,
"litellm_provider": "moonshot",
"max_input_tokens": 131072,
@@ -22464,6 +23701,7 @@
"output_cost_per_token": 0.000005,
"source": "https://platform.moonshot.ai/docs/pricing",
"supports_function_calling": true,
+ "supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true
},
@@ -22477,9 +23715,11 @@
"output_cost_per_token": 0.000003,
"source": "https://platform.moonshot.ai/docs/pricing",
"supports_function_calling": true,
+ "supports_response_schema": true,
"supports_tool_choice": true
},
"moonshot/moonshot-v1-32k-0430": {
+ "deprecation_date": "2024-04-30",
"input_cost_per_token": 0.000001,
"litellm_provider": "moonshot",
"max_input_tokens": 32768,
@@ -22501,6 +23741,7 @@
"output_cost_per_token": 0.000003,
"source": "https://platform.moonshot.ai/docs/pricing",
"supports_function_calling": true,
+ "supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true
},
@@ -22514,9 +23755,11 @@
"output_cost_per_token": 0.000002,
"source": "https://platform.moonshot.ai/docs/pricing",
"supports_function_calling": true,
+ "supports_response_schema": true,
"supports_tool_choice": true
},
"moonshot/moonshot-v1-8k-0430": {
+ "deprecation_date": "2024-04-30",
"input_cost_per_token": 2e-7,
"litellm_provider": "moonshot",
"max_input_tokens": 8192,
@@ -22538,6 +23781,7 @@
"output_cost_per_token": 0.000002,
"source": "https://platform.moonshot.ai/docs/pricing",
"supports_function_calling": true,
+ "supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true
},
@@ -22551,6 +23795,7 @@
"output_cost_per_token": 0.000005,
"source": "https://platform.moonshot.ai/docs/pricing",
"supports_function_calling": true,
+ "supports_response_schema": true,
"supports_tool_choice": true
},
"moonshotai.kimi-k2.5": {
@@ -24619,7 +25864,8 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
- "supports_native_streaming": true
+ "supports_native_streaming": true,
+ "supports_image_size": false
},
"oci/google.gemini-2.5-flash-lite": {
"input_cost_per_token": 7.5e-8,
@@ -24633,7 +25879,8 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
- "supports_native_streaming": true
+ "supports_native_streaming": true,
+ "supports_image_size": false
},
"oci/google.gemini-2.5-pro": {
"input_cost_per_token": 0.00000125,
@@ -25572,7 +26819,8 @@
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_image_size": false
},
"openrouter/google/gemini-2.5-pro": {
"input_cost_per_audio_token": 7e-7,
@@ -27207,6 +28455,84 @@
"mode": "chat",
"output_cost_per_token": 2.8e-7
},
+ "pinstripes/ps/deepseek-v4-flash": {
+ "max_tokens": 163840,
+ "max_input_tokens": 163840,
+ "max_output_tokens": 163840,
+ "input_cost_per_token": 1e-7,
+ "output_cost_per_token": 2e-7,
+ "litellm_provider": "pinstripes",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_assistant_prefill": true,
+ "supports_reasoning": true,
+ "source": "https://pinstripes.io/pricing"
+ },
+ "pinstripes/ps/glm-4.5-air": {
+ "max_tokens": 128000,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 128000,
+ "input_cost_per_token": 1.25e-7,
+ "output_cost_per_token": 4.5e-7,
+ "litellm_provider": "pinstripes",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_assistant_prefill": true,
+ "supports_reasoning": true,
+ "source": "https://pinstripes.io/pricing"
+ },
+ "pinstripes/ps/minimax-m2.7": {
+ "max_tokens": 1000192,
+ "max_input_tokens": 1000192,
+ "max_output_tokens": 1000192,
+ "input_cost_per_token": 2.55e-7,
+ "output_cost_per_token": 5.5e-7,
+ "litellm_provider": "pinstripes",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_assistant_prefill": true,
+ "supports_reasoning": false,
+ "source": "https://pinstripes.io/pricing"
+ },
+ "pinstripes/ps/qwen3-30b-a3b": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 9e-8,
+ "output_cost_per_token": 2e-7,
+ "litellm_provider": "pinstripes",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_assistant_prefill": true,
+ "supports_reasoning": true,
+ "source": "https://pinstripes.io/pricing"
+ },
+ "pinstripes/ps/qwen3-coder-30b-a3b": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 6e-7,
+ "litellm_provider": "pinstripes",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_assistant_prefill": true,
+ "supports_reasoning": false,
+ "source": "https://pinstripes.io/pricing"
+ },
+ "pinstripes/ps/qwen3.6-35b-a3b": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 4.5e-7,
+ "litellm_provider": "pinstripes",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_assistant_prefill": true,
+ "supports_reasoning": true,
+ "source": "https://pinstripes.io/pricing"
+ },
"publicai/BSC-LT/ALIA-40b-instruct_Q8_0": {
"input_cost_per_token": 0,
"litellm_provider": "publicai",
@@ -27530,7 +28856,8 @@
"supports_vision": true,
"supports_system_messages": true,
"supports_tool_choice": true,
- "supports_response_schema": true
+ "supports_response_schema": true,
+ "supports_image_size": false
},
"replicate/google/gemini-3-pro": {
"input_cost_per_token": 0.000002,
@@ -28079,21 +29406,278 @@
"output_cost_per_token": 0,
"supports_reasoning": true
},
- "snowflake/claude-3-5-sonnet": {
- "litellm_provider": "snowflake",
- "max_input_tokens": 18000,
+ "scaleway/google/gemma-3-27b-it": {
+ "input_cost_per_token": 2.5e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 40000,
"max_output_tokens": 8192,
"max_tokens": 8192,
"mode": "chat",
- "supports_computer_use": true
+ "output_cost_per_token": 5e-7,
+ "supports_function_calling": true,
+ "supports_vision": true
+ },
+ "scaleway/google/gemma-4-26b-a4b-it": {
+ "input_cost_per_token": 2.5e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "output_cost_per_token": 5e-7,
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_vision": true
+ },
+ "scaleway/hcompany/holo2-30b-a3b": {
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 22000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 7e-7,
+ "supports_reasoning": true,
+ "supports_vision": true
+ },
+ "scaleway/meta/llama-3.3-70b-instruct": {
+ "input_cost_per_token": 9e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 9e-7,
+ "supports_function_calling": true
+ },
+ "scaleway/mistralai/devstral-2-123b-instruct-2512": {
+ "input_cost_per_token": 4e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 0.000002,
+ "supports_function_calling": true
+ },
+ "scaleway/mistralai/mistral-medium-3.5-128b": {
+ "input_cost_per_token": 0.0000015,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000075,
+ "supports_reasoning": true,
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_tool_choice": true
+ },
+ "scaleway/mistralai/mistral-small-3.2-24b-instruct-2506": {
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "output_cost_per_token": 3.5e-7,
+ "supports_function_calling": true,
+ "supports_vision": true
+ },
+ "scaleway/mistralai/pixtral-12b-2409": {
+ "input_cost_per_token": 2e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096,
+ "mode": "chat",
+ "output_cost_per_token": 2e-7,
+ "supports_vision": true,
+ "supports_function_calling": true
+ },
+ "scaleway/mistralai/voxtral-small-24b-2507": {
+ "input_cost_per_audio_token": 1.5e-7,
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 32000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 3.5e-7,
+ "supports_audio_input": true
+ },
+ "scaleway/openai/gpt-oss-120b": {
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "output_cost_per_token": 6e-7,
+ "supports_function_calling": true
+ },
+ "scaleway/qwen/qwen3-235b-a22b-instruct-2507": {
+ "input_cost_per_token": 7.5e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000225,
+ "supports_function_calling": true
+ },
+ "scaleway/qwen/qwen3-coder-30b-a3b-instruct": {
+ "input_cost_per_token": 2e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "output_cost_per_token": 8e-7,
+ "supports_function_calling": true
+ },
+ "scaleway/qwen/qwen3.5-397b-a17b": {
+ "input_cost_per_token": 6e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000036,
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_vision": true
+ },
+ "scaleway/qwen/qwen3.6-35b-a3b": {
+ "input_cost_per_token": 2.5e-7,
+ "litellm_provider": "scaleway",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000015,
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_reasoning": true
+ },
+ "snowflake/claude-3-5-sonnet": {
+ "litellm_provider": "snowflake",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
+ },
+ "snowflake/claude-3-7-sonnet": {
+ "max_tokens": 16384,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true
+ },
+ "snowflake/claude-4-opus": {
+ "max_tokens": 16384,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.000005,
+ "output_cost_per_token": 0.000025,
+ "cache_read_input_token_cost": 5e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true
+ },
+ "snowflake/claude-4-sonnet": {
+ "max_tokens": 16384,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
+ },
+ "snowflake/claude-haiku-4-5": {
+ "max_tokens": 16384,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000005,
+ "cache_read_input_token_cost": 1e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
+ },
+ "snowflake/claude-sonnet-4-5": {
+ "max_tokens": 16384,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
+ },
+ "snowflake/claude-sonnet-4-6": {
+ "max_tokens": 16384,
+ "max_input_tokens": 200000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.000003,
+ "output_cost_per_token": 0.000015,
+ "cache_read_input_token_cost": 3e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
},
"snowflake/deepseek-r1": {
"litellm_provider": "snowflake",
- "max_input_tokens": 32768,
- "max_output_tokens": 8192,
- "max_tokens": 8192,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
"mode": "chat",
- "supports_reasoning": true
+ "input_cost_per_token": 0.00000135,
+ "output_cost_per_token": 0.0000054,
+ "supports_reasoning": true,
+ "supports_system_messages": true
},
"snowflake/gemma-7b": {
"litellm_provider": "snowflake",
@@ -28147,23 +29731,34 @@
"snowflake/llama3.1-405b": {
"litellm_provider": "snowflake",
"max_input_tokens": 128000,
- "max_output_tokens": 8192,
- "max_tokens": 8192,
- "mode": "chat"
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "input_cost_per_token": 0.0000012,
+ "output_cost_per_token": 0.0000012,
+ "supports_function_calling": true,
+ "supports_system_messages": true
},
"snowflake/llama3.1-70b": {
"litellm_provider": "snowflake",
"max_input_tokens": 128000,
- "max_output_tokens": 8192,
- "max_tokens": 8192,
- "mode": "chat"
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "input_cost_per_token": 7.2e-7,
+ "output_cost_per_token": 7.2e-7,
+ "supports_function_calling": true,
+ "supports_system_messages": true
},
"snowflake/llama3.1-8b": {
"litellm_provider": "snowflake",
"max_input_tokens": 128000,
- "max_output_tokens": 8192,
- "max_tokens": 8192,
- "mode": "chat"
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "input_cost_per_token": 2.4e-7,
+ "output_cost_per_token": 2.4e-7,
+ "supports_system_messages": true
},
"snowflake/llama3.2-1b": {
"litellm_provider": "snowflake",
@@ -28180,11 +29775,26 @@
"mode": "chat"
},
"snowflake/llama3.3-70b": {
+ "max_tokens": 16384,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 7.2e-7,
+ "output_cost_per_token": 7.2e-7,
"litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_system_messages": true
+ },
+ "snowflake/llama4-maverick": {
+ "max_tokens": 16384,
"max_input_tokens": 128000,
- "max_output_tokens": 8192,
- "max_tokens": 8192,
- "mode": "chat"
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 2.4e-7,
+ "output_cost_per_token": 9.7e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_system_messages": true
},
"snowflake/mistral-7b": {
"litellm_provider": "snowflake",
@@ -28203,9 +29813,14 @@
"snowflake/mistral-large2": {
"litellm_provider": "snowflake",
"max_input_tokens": 128000,
- "max_output_tokens": 8192,
- "max_tokens": 8192,
- "mode": "chat"
+ "max_output_tokens": 16384,
+ "max_tokens": 16384,
+ "mode": "chat",
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000006,
+ "supports_function_calling": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
},
"snowflake/mixtral-8x7b": {
"litellm_provider": "snowflake",
@@ -28214,6 +29829,61 @@
"max_tokens": 8192,
"mode": "chat"
},
+ "snowflake/openai-gpt-4.1": {
+ "max_tokens": 16384,
+ "max_input_tokens": 300000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.000002,
+ "output_cost_per_token": 0.000008,
+ "cache_read_input_token_cost": 5e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
+ },
+ "snowflake/openai-gpt-5": {
+ "max_tokens": 16384,
+ "max_input_tokens": 300000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.00000125,
+ "output_cost_per_token": 0.00001,
+ "cache_read_input_token_cost": 1.25e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true
+ },
+ "snowflake/openai-gpt-5-mini": {
+ "max_tokens": 16384,
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000012,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
+ },
+ "snowflake/openai-gpt-5-nano": {
+ "max_tokens": 16384,
+ "max_input_tokens": 5000000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "litellm_provider": "snowflake",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_system_messages": true,
+ "supports_response_schema": true
+ },
"snowflake/reka-core": {
"litellm_provider": "snowflake",
"max_input_tokens": 32000,
@@ -28243,11 +29913,174 @@
"mode": "chat"
},
"snowflake/snowflake-llama-3.3-70b": {
+ "max_tokens": 16384,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 7.2e-7,
+ "output_cost_per_token": 7.2e-7,
"litellm_provider": "snowflake",
- "max_input_tokens": 8000,
- "max_output_tokens": 8192,
- "max_tokens": 8192,
- "mode": "chat"
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_system_messages": true
+ },
+ "tensormesh/MiniMaxAI/MiniMax-M2.5": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000012,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 196608,
+ "max_output_tokens": 196608,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 4.5e-7,
+ "output_cost_per_token": 0.0000018,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/Qwen/Qwen3.5-397B-A17B-FP8": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 6e-7,
+ "output_cost_per_token": 0.0000036,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/Qwen/Qwen3.6-27B-FP8": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 3.2e-7,
+ "output_cost_per_token": 0.0000032,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/deepseek-ai/DeepSeek-V4-Flash": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 2.8e-7,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 32768,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/google/gemma-4-31B-it": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 1.4e-7,
+ "output_cost_per_token": 5.6e-7,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 32768,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/lukealonso/GLM-5.1-NVFP4-MTP": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 0.0000014,
+ "output_cost_per_token": 0.0000044,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 202752,
+ "max_output_tokens": 202752,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/moonshotai/Kimi-K2.6": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 9.6e-7,
+ "output_cost_per_token": 0.000004,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 32768,
+ "max_output_tokens": 32768,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/openai/gpt-oss-120b": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 1.5e-7,
+ "output_cost_per_token": 6e-7,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
+ },
+ "tensormesh/openai/gpt-oss-20b": {
+ "litellm_provider": "tensormesh",
+ "mode": "chat",
+ "input_cost_per_token": 7e-8,
+ "output_cost_per_token": 2.8e-7,
+ "cache_read_input_token_cost": 0,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_response_schema": true,
+ "supports_prompt_caching": true,
+ "supports_system_messages": true,
+ "supports_reasoning": true,
+ "source": "https://serverless.tensormesh.ai/v1/models/openrouter"
},
"together-ai-21.1b-41b": {
"input_cost_per_token": 8e-7,
@@ -28658,19 +30491,21 @@
"supports_video_input": true
},
"us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0": {
- "cache_creation_input_token_cost": 0.000004125,
- "cache_read_input_token_cost": 3.3e-7,
- "input_cost_per_token": 0.0000033,
- "input_cost_per_token_above_200k_tokens": 0.0000066,
- "output_cost_per_token_above_200k_tokens": 0.00002475,
- "cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
- "cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
+ "cache_creation_input_token_cost": 0.0000045,
+ "cache_creation_input_token_cost_above_1hr": 0.0000072,
+ "cache_read_input_token_cost": 3.6e-7,
+ "input_cost_per_token": 0.0000036,
+ "input_cost_per_token_above_200k_tokens": 0.0000072,
+ "output_cost_per_token_above_200k_tokens": 0.000027,
+ "cache_creation_input_token_cost_above_200k_tokens": 0.000009,
+ "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.0000144,
+ "cache_read_input_token_cost_above_200k_tokens": 7.2e-7,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 200000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
- "output_cost_per_token": 0.0000165,
+ "output_cost_per_token": 0.000018,
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -28891,6 +30726,39 @@
"cache_read_input_token_cost": 3e-7,
"cache_creation_input_token_cost": 0.00000375
},
+ "us.anthropic.claude-fable-5": {
+ "cache_creation_input_token_cost": 0.00001375,
+ "cache_creation_input_token_cost_above_1hr": 0.000022,
+ "cache_read_input_token_cost": 0.0000011,
+ "input_cost_per_token": 0.000011,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000055,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_native_structured_output": true,
+ "supports_max_reasoning_effort": true,
+ "supports_output_config": true,
+ "bedrock_output_config_effort_ceiling": "xhigh"
+ },
"us.anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 0.000001375,
"cache_creation_input_token_cost_above_1hr": 0.0000022,
@@ -29046,6 +30914,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -29070,6 +30939,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -29077,6 +30947,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -29912,7 +31783,8 @@
"supports_vision": true,
"supports_function_calling": true,
"supports_tool_choice": true,
- "supports_response_schema": true
+ "supports_response_schema": true,
+ "supports_image_size": false
},
"vercel_ai_gateway/google/gemini-2.5-pro": {
"input_cost_per_token": 0.0000025,
@@ -30645,6 +32517,7 @@
},
"vertex_ai/claude-3-7-sonnet@20250219": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"deprecation_date": "2026-05-11",
"input_cost_per_token": 0.000003,
@@ -30742,8 +32615,69 @@
"supports_tool_choice": true,
"supports_vision": true
},
+ "vertex_ai/claude-fable-5": {
+ "cache_creation_input_token_cost": 0.0000125,
+ "cache_creation_input_token_cost_above_1hr": 0.00002,
+ "cache_read_input_token_cost": 0.000001,
+ "input_cost_per_token": 0.00001,
+ "litellm_provider": "vertex_ai-anthropic_models",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00005,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_max_reasoning_effort": true
+ },
+ "vertex_ai/claude-fable-5@default": {
+ "cache_creation_input_token_cost": 0.0000125,
+ "cache_creation_input_token_cost_above_1hr": 0.00002,
+ "cache_read_input_token_cost": 0.000001,
+ "input_cost_per_token": 0.00001,
+ "litellm_provider": "vertex_ai-anthropic_models",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00005,
+ "search_context_cost_per_query": {
+ "search_context_size_high": 0.01,
+ "search_context_size_low": 0.01,
+ "search_context_size_medium": 0.01
+ },
+ "supports_adaptive_thinking": true,
+ "supports_assistant_prefill": false,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_sampling_params": false,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_max_reasoning_effort": true
+ },
"vertex_ai/claude-haiku-4-5": {
"cache_creation_input_token_cost": 0.00000125,
+ "cache_creation_input_token_cost_above_1hr": 0.000002,
"cache_read_input_token_cost": 1e-7,
"input_cost_per_token": 0.000001,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30765,6 +32699,7 @@
},
"vertex_ai/claude-haiku-4-5@20251001": {
"cache_creation_input_token_cost": 0.00000125,
+ "cache_creation_input_token_cost_above_1hr": 0.000002,
"cache_read_input_token_cost": 1e-7,
"input_cost_per_token": 0.000001,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30786,6 +32721,7 @@
},
"vertex_ai/claude-opus-4": {
"cache_creation_input_token_cost": 0.00001875,
+ "cache_creation_input_token_cost_above_1hr": 0.00003,
"cache_read_input_token_cost": 0.0000015,
"input_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30811,6 +32747,7 @@
},
"vertex_ai/claude-opus-4-1": {
"cache_creation_input_token_cost": 0.00001875,
+ "cache_creation_input_token_cost_above_1hr": 0.00003,
"cache_read_input_token_cost": 0.0000015,
"input_cost_per_token": 0.000015,
"input_cost_per_token_batches": 0.0000075,
@@ -30828,6 +32765,7 @@
},
"vertex_ai/claude-opus-4-1@20250805": {
"cache_creation_input_token_cost": 0.00001875,
+ "cache_creation_input_token_cost_above_1hr": 0.00003,
"cache_read_input_token_cost": 0.0000015,
"input_cost_per_token": 0.000015,
"input_cost_per_token_batches": 0.0000075,
@@ -30845,6 +32783,7 @@
},
"vertex_ai/claude-opus-4-5": {
"cache_creation_input_token_cost": 0.00000625,
+ "cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
"input_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30871,6 +32810,7 @@
},
"vertex_ai/claude-opus-4-5@20251101": {
"cache_creation_input_token_cost": 0.00000625,
+ "cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
"input_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30898,6 +32838,7 @@
},
"vertex_ai/claude-opus-4-6": {
"cache_creation_input_token_cost": 0.00000625,
+ "cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
"input_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30925,6 +32866,7 @@
},
"vertex_ai/claude-opus-4-6@default": {
"cache_creation_input_token_cost": 0.00000625,
+ "cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
"input_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30952,6 +32894,7 @@
},
"vertex_ai/claude-opus-4-7": {
"cache_creation_input_token_cost": 0.00000625,
+ "cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
"input_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30972,6 +32915,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -30979,6 +32923,7 @@
},
"vertex_ai/claude-opus-4-7@default": {
"cache_creation_input_token_cost": 0.00000625,
+ "cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
"input_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -30999,6 +32944,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -31020,6 +32966,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -31027,6 +32974,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -31048,6 +32996,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
+ "supports_adaptive_thinking": true,
"supports_assistant_prefill": false,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -31055,6 +33004,7 @@
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
+ "supports_sampling_params": false,
"supports_tool_choice": true,
"supports_vision": true,
"supports_xhigh_reasoning_effort": true,
@@ -31062,6 +33012,7 @@
},
"vertex_ai/claude-opus-4@20250514": {
"cache_creation_input_token_cost": 0.00001875,
+ "cache_creation_input_token_cost_above_1hr": 0.00003,
"cache_read_input_token_cost": 0.0000015,
"input_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -31087,6 +33038,7 @@
},
"vertex_ai/claude-sonnet-4": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"input_cost_per_token_above_200k_tokens": 0.000006,
@@ -31116,6 +33068,7 @@
},
"vertex_ai/claude-sonnet-4-5": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"input_cost_per_token_above_200k_tokens": 0.000006,
@@ -31142,6 +33095,7 @@
},
"vertex_ai/claude-sonnet-4-5@20250929": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"input_cost_per_token_above_200k_tokens": 0.000006,
@@ -31169,6 +33123,7 @@
},
"vertex_ai/claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -31196,6 +33151,7 @@
},
"vertex_ai/claude-sonnet-4-6@default": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"litellm_provider": "vertex_ai-anthropic_models",
@@ -31223,6 +33179,7 @@
},
"vertex_ai/claude-sonnet-4@20250514": {
"cache_creation_input_token_cost": 0.00000375,
+ "cache_creation_input_token_cost_above_1hr": 0.000006,
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"input_cost_per_token_above_200k_tokens": 0.000006,
@@ -31408,7 +33365,8 @@
"supports_url_context": true,
"supports_vision": true,
"supports_web_search": false,
- "tpm": 8000000
+ "tpm": 8000000,
+ "supports_image_size": false
},
"vertex_ai/gemini-3-flash-preview": {
"cache_read_input_token_cost": 5e-8,
@@ -31837,6 +33795,22 @@
},
"web_search_billing_unit": "per_query"
},
+ "vertex_ai/google/gemma-4-26b-a4b-it-maas": {
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "vertex_ai-openai_models",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 6e-7,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/maas/google/gemma-4-26b-a4b-it",
+ "supported_regions": [
+ "global"
+ ],
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
"vertex_ai/jamba-1.5": {
"input_cost_per_token": 2e-7,
"litellm_provider": "vertex_ai-ai21_models",
@@ -33190,7 +35164,8 @@
"supports_prompt_caching": true,
"supports_response_schema": false,
"supports_tool_choice": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-3-beta": {
"cache_read_input_token_cost": 7.5e-7,
@@ -33388,7 +35363,8 @@
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_tool_choice": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-4-1-fast": {
"cache_read_input_token_cost": 5e-8,
@@ -33429,7 +35405,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-4-1-fast-non-reasoning-latest": {
"cache_read_input_token_cost": 5e-8,
@@ -33449,7 +35426,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-4-1-fast-reasoning": {
"cache_read_input_token_cost": 5e-8,
@@ -33470,7 +35448,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-4-1-fast-reasoning-latest": {
"cache_read_input_token_cost": 5e-8,
@@ -33491,7 +35470,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-4-fast-non-reasoning": {
"cache_read_input_token_cost": 5e-8,
@@ -33508,7 +35488,8 @@
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_tool_choice": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-4-fast-reasoning": {
"cache_read_input_token_cost": 5e-8,
@@ -33525,7 +35506,8 @@
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_tool_choice": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-4-latest": {
"input_cost_per_token": 0.000003,
@@ -33692,7 +35674,8 @@
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-code-fast-1-0825": {
"cache_read_input_token_cost": 2e-8,
@@ -33707,7 +35690,8 @@
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "deprecation_date": "2026-05-15"
},
"xai/grok-vision-beta": {
"input_cost_per_image": 0.000005,
diff --git a/cecli/scrape.py b/cecli/scrape.py
index 0722e11768d..390e3e9b295 100755
--- a/cecli/scrape.py
+++ b/cecli/scrape.py
@@ -220,7 +220,7 @@ def scrape_with_httpx(self, url):
return None, None
def try_pandoc(self):
- if self.pandoc_available:
+ if self.pandoc_available is not None:
return
try:
@@ -232,11 +232,9 @@ def try_pandoc(self):
try:
pypandoc.download_pandoc(delete_installer=True)
- except Exception as err:
- self.print_error(f"Unable to install pandoc: {err}")
- return
-
- self.pandoc_available = True
+ self.pandoc_available = True
+ except Exception:
+ self.pandoc_available = False
def html_to_markdown(self, page_source):
from bs4 import BeautifulSoup
diff --git a/cecli/tools/__init__.py b/cecli/tools/__init__.py
index 44e527cff37..4791865349d 100644
--- a/cecli/tools/__init__.py
+++ b/cecli/tools/__init__.py
@@ -6,7 +6,6 @@
_yield,
command,
command_interactive,
- context_manager,
delegate,
edit_text,
explore_code,
@@ -17,10 +16,9 @@
git_show,
git_status,
grep,
- load_skill,
ls,
read_range,
- remove_skill,
+ resource_manager,
thinking,
undo_change,
update_todo_list,
@@ -30,7 +28,6 @@
TOOL_MODULES = [
command,
command_interactive,
- context_manager,
delegate,
edit_text,
explore_code,
@@ -42,10 +39,9 @@
git_show,
git_status,
grep,
- load_skill,
ls,
read_range,
- remove_skill,
+ resource_manager,
thinking,
undo_change,
update_todo_list,
diff --git a/cecli/tools/command.py b/cecli/tools/command.py
index f09d32f2cdf..646fd46d5ba 100644
--- a/cecli/tools/command.py
+++ b/cecli/tools/command.py
@@ -3,6 +3,8 @@
import os
import platform
+import xxhash
+
from cecli.helpers.background_commands import BackgroundCommandManager
from cecli.run_cmd import run_cmd_subprocess
from cecli.tools.utils.base_tool import BaseTool
@@ -14,6 +16,7 @@
class Tool(BaseTool):
NORM_NAME = "command"
TRACK_INVOCATIONS = False
+ ALLOWED_SESSION_COMMANDS = {}
SCHEMA = {
"type": "function",
"function": {
@@ -70,6 +73,14 @@ def _parse_command_key(command):
return command.split("::", 1)[1].strip()
return None
+ @staticmethod
+ def _hash_command(command):
+ """Compute an xxhash of the full command text for session tracking."""
+ if not command:
+ return command
+
+ return xxhash.xxh64(command).hexdigest()
+
@classmethod
async def execute(
cls, coder, command, background=False, stop=None, stdin=None, pty=False, **kwargs
@@ -124,6 +135,8 @@ async def execute(
if not confirmed:
return "Command execution skipped by user."
+ command = coder.format_command_with_prefix(command)
+
# Determine timeout from agent_config (default: 30 seconds)
timeout = 0
if hasattr(coder, "agent_config"):
@@ -139,6 +152,15 @@ async def execute(
@classmethod
async def _get_confirmation(cls, coder, command_string, background):
"""Get user confirmation for command execution."""
+ # Hash command for dict key lookup
+ command_hash = cls._hash_command(command_string)
+
+ # Check if command is already handled for this session
+ if command_hash in cls.ALLOWED_SESSION_COMMANDS:
+ if cls.ALLOWED_SESSION_COMMANDS[command_hash]:
+ return True # Previously approved for session
+ # Previously declined - skip session question, continue to normal confirmation
+
if coder.skip_cli_confirmations:
return True
@@ -150,21 +172,32 @@ async def _get_confirmation(cls, coder, command_string, background):
if fnmatch.fnmatch(command_string, pattern):
return True
- command_string = coder.format_command_with_prefix(command_string)
+ formatted_command = coder.format_command_with_prefix(command_string)
if background:
prompt = "Allow execution of this background command?"
else:
prompt = "Allow execution of this command?"
- return await coder.io.confirm_ask(
+ confirmed = await coder.io.confirm_ask(
prompt,
- subject=command_string,
+ subject=formatted_command,
explicit_yes_required=True,
allow_never=True,
group_response="Command Tool",
)
+ if confirmed:
+ # Ask if user wants to allow for the entire session (only once per command)
+ if command_hash not in cls.ALLOWED_SESSION_COMMANDS:
+ session_allowed = await coder.io.confirm_ask(
+ "Allow this command for the rest of the session?",
+ subject=formatted_command,
+ )
+ cls.ALLOWED_SESSION_COMMANDS[command_hash] = session_allowed
+
+ return confirmed
+
@classmethod
async def _execute_background(cls, coder, command_string, use_pty=False):
"""
@@ -277,8 +310,8 @@ async def _execute_with_timeout(cls, coder, command_string, timeout, use_pty=Fal
output_content = (
f"[Large Response ({total_size} characters). "
"Output saved to paginated files.]\n"
- f"File Aliases (for use with ContextManager):\n{alias_list_str}\n"
- "Use the `ContextManager` tool to view these files."
+ f"File Aliases (for use with ResourceManager):\n{alias_list_str}\n"
+ "Use the `ResourceManager` tool to view these files."
"Do not use standard cli tools to view these files."
"Remove them from context after taking notes on the relevant information "
"to prevent overfilling stale context."
@@ -364,8 +397,8 @@ async def _execute_foreground(cls, coder, command_string):
output_content = (
f"[Large Response ({total_size} characters). "
"Output saved to paginated files.]\n"
- f"File Aliases (for use with ContextManager):\n{alias_list_str}\n"
- "Use the `ContextManager` tool to view these files."
+ f"File Aliases (for use with ResourceManager):\n{alias_list_str}\n"
+ "Use the `ResourceManager` tool to view these files."
"Do not use standard cli tools to view these files."
"Remove them from context after taking note of the relevant information "
"in the output to prevent overfilling stale context."
diff --git a/cecli/tools/command_interactive.py b/cecli/tools/command_interactive.py
index 30a27a60ec8..1a24a66093d 100644
--- a/cecli/tools/command_interactive.py
+++ b/cecli/tools/command_interactive.py
@@ -2,6 +2,8 @@
import asyncio
import fnmatch
+import xxhash
+
from cecli.run_cmd import run_cmd
from cecli.tools.utils.base_tool import BaseTool
@@ -9,6 +11,7 @@
class Tool(BaseTool):
NORM_NAME = "commandinteractive"
TRACK_INVOCATIONS = False
+ ALLOWED_SESSION_COMMANDS = {}
SCHEMA = {
"type": "function",
"function": {
@@ -36,40 +39,74 @@ def _is_command_allowed(coder, command_string):
"""Check if command matches any allowed_commands patterns."""
if hasattr(coder, "agent_config"):
allowed_commands = coder.agent_config.get("allowed_commands", [])
- for pattern in allowed_commands:
- if fnmatch.fnmatch(command_string, pattern):
- return True
-
+ if allowed_commands:
+ for pattern in allowed_commands:
+ if fnmatch.fnmatch(command_string, pattern):
+ return True
return False
+ @staticmethod
+ def _hash_command(command):
+ """Compute an xxhash of the full command text for session tracking."""
+ if not command:
+ return command
+
+ return xxhash.xxh64(command).hexdigest()
+
+ @classmethod
+ async def _get_confirmation(cls, coder, command_string):
+ """Get user confirmation for command execution."""
+ # Hash command for dict key lookup
+ command_hash = cls._hash_command(command_string)
+
+ # Check if command is already handled for this session
+ if command_hash in cls.ALLOWED_SESSION_COMMANDS:
+ if cls.ALLOWED_SESSION_COMMANDS[command_hash]:
+ return True # Previously approved for session
+ # Previously declined - skip session question, continue to normal confirmation
+
+ if coder.skip_cli_confirmations:
+ return True
+
+ # Check if command matches any allowed_commands patterns
+ if cls._is_command_allowed(coder, command_string):
+ return True
+
+ formatted_command = coder.format_command_with_prefix(command_string)
+
+ confirmed = await coder.io.confirm_ask(
+ "Allow execution of this command?",
+ subject=formatted_command,
+ explicit_yes_required=True,
+ allow_never=True,
+ group_response="Command Interactive Tool",
+ )
+
+ if not confirmed:
+ return False
+
+ # Ask if user wants to allow for the entire session (only once per command)
+ if command_hash not in cls.ALLOWED_SESSION_COMMANDS:
+ session_allowed = await coder.io.confirm_ask(
+ "Allow this command for the rest of the session?",
+ subject=formatted_command,
+ )
+ cls.ALLOWED_SESSION_COMMANDS[command_hash] = session_allowed
+
+ return True
+
@classmethod
async def execute(cls, coder, command_string, **kwargs):
"""
Execute an interactive shell command using run_cmd (which uses pexpect/PTY).
"""
try:
- command_string = coder.format_command_with_prefix(command_string)
-
- confirmed = (
- True
- if coder.skip_cli_confirmations or cls._is_command_allowed(coder, command_string)
- else await coder.io.confirm_ask(
- "Allow execution of this command?",
- subject=command_string,
- explicit_yes_required=True, # Require explicit 'yes' or 'always'
- allow_never=True, # Enable the 'Always' option
- group_response="Command Interactive Tool",
- )
- )
-
+ confirmed = await cls._get_confirmation(coder, command_string)
if not confirmed:
- # This happens if the user explicitly says 'no' this time.
- # If 'Always' was chosen previously, confirm_ask returns True directly.
- coder.io.tool_output(
- f"Skipped execution of shell command: {command_string}", type="tool-result"
- )
return "Shell command execution skipped by user."
+ command_string = coder.format_command_with_prefix(command_string)
+
coder.io.tool_output(
f"⛭ Starting interactive shell command: {command_string}", type="tool-result"
)
@@ -86,7 +123,6 @@ def _run_interactive():
)
if tui:
- # Notify user and suspend TUI for interactive command
coder.io.tool_output(
">>> Suspending TUI for interactive command <<<", type="tool-result"
)
@@ -107,10 +143,8 @@ def _run_interactive():
# Format the output for the result message, include more content
output_content = combined_output or ""
- # Use the existing token threshold constant as the character limit for truncation
output_limit = coder.large_file_token_threshold
if coder.context_management_enabled and len(output_content) > output_limit:
- # Truncate and add a clear message using the constant value
output_content = (
output_content[:output_limit]
+ f"\n... (output truncated at {output_limit} characters, based on"
diff --git a/cecli/tools/context_manager.py b/cecli/tools/context_manager.py
deleted file mode 100644
index c5883f97e16..00000000000
--- a/cecli/tools/context_manager.py
+++ /dev/null
@@ -1,308 +0,0 @@
-import os
-import re
-import time
-
-from cecli.helpers.background_commands import BackgroundCommandManager
-from cecli.tools.utils.base_tool import BaseTool
-from cecli.tools.utils.helpers import ToolError, parse_arg_as_list
-from cecli.tools.utils.output import color_markers, tool_footer, tool_header
-from cecli.tools.validations import ToolValidations
-
-
-class Tool(BaseTool):
- NORM_NAME = "contextmanager"
- SCHEMA = {
- "type": "function",
- "function": {
- "name": "ContextManager",
- "description": (
- "Manage multiple files in the chat context: add, read_only, create, and remove."
- " Accepts arrays of file paths for each operation."
- ),
- "parameters": {
- "type": "object",
- "properties": {
- "add": {
- "type": "array",
- "items": {"type": "string"},
- "description": (
- "List of file paths to add to context. Limit to at most 2 at a time."
- ),
- },
- "read_only": {
- "type": "array",
- "items": {"type": "string"},
- "description": (
- "List of file paths to add as read-only. Limit to at most 2 at a time."
- ),
- },
- "create": {
- "type": "array",
- "items": {"type": "string"},
- "description": "List of file paths to create.",
- },
- "remove": {
- "type": "array",
- "items": {"type": "string"},
- "description": "List of file paths to remove from context.",
- },
- "stop": {
- "type": "array",
- "items": {"type": "string"},
- "description": "List of command keys to stop background commands for.",
- },
- },
- "additionalProperties": False,
- "required": [],
- },
- },
- }
-
- @classmethod
- def execute(
- cls, coder, remove=None, add=None, read_only=None, create=None, stop=None, **kwargs
- ):
- """Perform batch operations on the coder's context.
-
- Parameters
- ----------
- coder: Coder instance
- The active coder handling file context.
- remove: list[str] | None
- Files to remove from the context.
- add: list[str] | None
- Files to promote to editable status.
- view: list[str] | None
- Files to add as read-only view.
- create: list[str] | None
- Files to create and make editable.
- """
- remove_files = sorted(parse_arg_as_list(remove), key=cls._natural_sort_key)
- editable_files = sorted(parse_arg_as_list(add), key=cls._natural_sort_key)
- view_files = sorted(parse_arg_as_list(read_only), key=cls._natural_sort_key)
- create_files = sorted(parse_arg_as_list(create), key=cls._natural_sort_key)
- stop_keys = sorted(parse_arg_as_list(stop), key=cls._natural_sort_key)
-
- if (
- not remove_files
- and not editable_files
- and not view_files
- and not create_files
- and not stop_keys
- ):
- raise ToolError(
- "You must specify at least one of: remove, editable, view, create, or stop"
- )
-
- coder.io.tool_output("⛭ Modifying Context", type="tool-result")
- messages = []
-
- for f in create_files:
- messages.append(cls._create(coder, f))
- for f in remove_files:
- messages.append(cls._remove(coder, f))
- for f in view_files:
- messages.append(cls._view(coder, f))
- for f in editable_files:
- messages.append(cls._editable(coder, f))
- for key in stop_keys:
- messages.append(cls._stop_command(coder, key))
-
- if coder.tui and coder.tui():
- coder.tui().refresh()
-
- coder.context_blocks_cache = {}
- coder.edit_allowed = True
-
- return "\n".join(messages)
-
- @classmethod
- def format_output(cls, coder, mcp_server, tool_response):
- """Format output for ContextManager tool."""
- color_start, color_end = color_markers(coder)
-
- # Output header
- tool_header(coder=coder, mcp_server=mcp_server, tool_response=tool_response)
-
- try:
- params = ToolValidations.validate_params(
- tool_response.function.arguments, cls.VALIDATIONS, cls.SCHEMA
- )
- except ToolError:
- coder.io.tool_error("Invalid Tool JSON")
- return
-
- # Define action display names
- action_names = {
- "create": "create",
- "remove": "remove",
- "view": "view",
- "editable": "editable",
- "stop": "stop",
- }
-
- # Output each action with comma-separated file list
- for action_key, display_name in action_names.items():
- files = sorted(parse_arg_as_list(params.get(action_key)), key=cls._natural_sort_key)
- if files:
- file_list = ", ".join(files)
- coder.io.tool_output(f"{color_start}{display_name}:{color_end} {file_list}")
-
- tool_footer(coder=coder, tool_response=tool_response, params=params)
-
- @classmethod
- def _remove(cls, coder, file_path):
- """Remove a file from the coder's context."""
- from cecli.helpers.conversation import ConversationService
-
- try:
- abs_path = cls._resolve_file_path(coder, file_path)
- rel_path = coder.get_rel_fname(abs_path)
- removed = False
-
- if abs_path in coder.abs_fnames:
- coder.abs_fnames.remove(abs_path)
- removed = True
-
- if abs_path in coder.abs_read_only_fnames:
- coder.abs_read_only_fnames.remove(abs_path)
- removed = True
-
- if not removed:
- coder.io.tool_output(f"⚠ File '{file_path}' not in context", type="tool-result")
- return f"File not in context: {file_path}"
-
- coder.recently_removed[rel_path] = {"removed_at": time.time()}
-
- if not file_path.startswith("command_key::"):
- ConversationService.get_chunks(coder).defer_removal(abs_path)
- ConversationService.get_chunks(coder).defer_removal(rel_path)
-
- coder.io.tool_output(f"✗ Removed '{file_path}' from context", type="tool-result")
- return (
- f"Removed: {file_path}\n"
- "Old file contents may remain visible. This is an acceptable system behavior."
- )
- except Exception as e:
- coder.io.tool_error(f"Error removing file '{file_path}': {str(e)}")
- return f"Error removing {file_path}: {e}"
-
- @classmethod
- def _stop_command(cls, coder, command_key):
- """Stop a background command by its command key."""
- try:
- success, output, exit_code = BackgroundCommandManager.stop_background_command(
- command_key
- )
- if success:
- coder.io.tool_output(
- f"✗ Stopped background command '{command_key}'", type="tool-result"
- )
- return (
- f"Background command stopped: {command_key}\n"
- f"Exit code: {exit_code}\n"
- f"Final output:\n{output}"
- )
- else:
- coder.io.tool_output(
- f"⚠ Background command '{command_key}' not found or not running",
- type="tool-result",
- )
- return f"Command not found or not running: {command_key}"
- except Exception as e:
- coder.io.tool_error(f"Error stopping command '{command_key}': {str(e)}")
- return f"Error stopping {command_key}: {e}"
-
- @classmethod
- def _editable(cls, coder, file_path):
- """Make a file editable in the coder's context."""
- try:
- abs_path = cls._resolve_file_path(coder, file_path)
- if abs_path in coder.abs_fnames:
- coder.io.tool_output(
- f"🗀 File '{file_path}' is already editable", type="tool-result"
- )
- return f"Already editable: {file_path}"
- if not os.path.isfile(abs_path):
- coder.io.tool_output(f"⚠ File '{file_path}' not found on disk", type="tool-result")
- return f"File not found: {file_path}"
- was_read_only = False
- if abs_path in coder.abs_read_only_fnames:
- coder.abs_read_only_fnames.remove(abs_path)
- was_read_only = True
- coder.abs_fnames.add(abs_path)
- if was_read_only:
- coder.io.tool_output(
- f"🗀 Moved '{file_path}' from read-only to editable", type="tool-result"
- )
- return f"Made editable (moved): {file_path}"
- else:
- coder.io.tool_output(
- f"🗀 Added '{file_path}' directly to editable context", type="tool-result"
- )
- return f"Made editable (added): {file_path}"
- except Exception as e:
- coder.io.tool_error(f"Error making editable '{file_path}': {str(e)}")
- return f"Error making editable {file_path}: {e}"
-
- @classmethod
- def _view(cls, coder, file_path):
- """View a file (add as read‑only) in the coder's context."""
- try:
- resolved_path = cls._resolve_file_path(coder, file_path)
- return coder._add_file_to_context(resolved_path, explicit=True)
- except Exception as e:
- coder.io.tool_error(f"Error viewing file '{file_path}': {str(e)}")
- return f"Error viewing {file_path}: {e}"
-
- @classmethod
- def _create(cls, coder, file_path):
- """Create a new file on the file system and make it editable in the coder's context."""
- try:
- abs_path = coder.abs_root_path(file_path)
-
- # Check if file already exists
- if os.path.exists(abs_path):
- coder.io.tool_output(f"⚠ File '{file_path}' already exists", type="tool-result")
- return f"File already exists: {file_path}"
-
- # Create parent directories if they don't exist
- os.makedirs(os.path.dirname(abs_path), exist_ok=True)
-
- # Create an empty file
- with open(abs_path, "w", encoding="utf-8"):
- pass
-
- # Add the file to editable context
- coder.abs_fnames.add(abs_path)
-
- coder.io.tool_output(
- f"🗀 Created '{file_path}' and made it editable", type="tool-result"
- )
- return f"Created and made editable: {file_path}"
-
- except Exception as e:
- coder.io.tool_error(f"Error creating file '{file_path}': {str(e)}")
- return f"Error creating {file_path}: {e}"
-
- @classmethod
- def _resolve_file_path(cls, coder, file_path):
- """Resolve a file path, handling command_key:: aliases.
-
- command_key::{command_key}/{filename} resolves to the actual
- file path under the agent's local agent folder.
- """
- if file_path.startswith("command_key::"):
- alias_path = file_path[len("command_key::") :]
- parts = alias_path.split("/", 1)
- if len(parts) == 2:
- command_key = parts[0]
- filename = parts[1]
- rel_path = coder.local_agent_folder(f"{command_key}/{filename}")
- return coder.abs_root_path(rel_path)
- return coder.abs_root_path(file_path)
-
- @classmethod
- def _natural_sort_key(cls, s: str) -> list:
- """Natural sort key that splits "a10b2" into ["a", 10, "b", 2]."""
- return [int(text) if text.isdigit() else text.lower() for text in re.split(r"(\d+)", s)]
diff --git a/cecli/tools/edit_text.py b/cecli/tools/edit_text.py
index a22e96dcd57..15dfb18730f 100644
--- a/cecli/tools/edit_text.py
+++ b/cecli/tools/edit_text.py
@@ -88,7 +88,13 @@ class Tool(BaseTool):
),
},
},
- "required": ["file_path"],
+ "required": [
+ "file_path",
+ "operation",
+ "start_line",
+ "end_line",
+ "text",
+ ],
},
"description": "Array of edits to apply.",
},
@@ -181,12 +187,18 @@ def execute(
)
edit_text_raw = edit.get("text")
- edit_text = (
- strip_hashline(edit_text_raw) if edit_text_raw is not None else None
- )
+ edit_text = edit.get("text")
edit_start_line = edit.get("start_line")
edit_end_line = edit.get("end_line")
+ if edit_text_raw is not None:
+ edit_text_raw = strip_hashline(edit_text_raw)
+ while edit_text_raw != edit_text:
+ edit_text_raw = strip_hashline(edit_text_raw)
+ edit_text = strip_hashline(edit_text)
+
+ edit_text = edit_text_raw
+
# Try to resolve line content values to content IDs
# This handles cases where LLMs pass actual line content
# instead of content ID markers
@@ -437,7 +449,7 @@ def format_output(cls, coder, mcp_server, tool_response):
start_line = edit.get("start_line")
end_line = edit.get("end_line")
# Show output based on operation type
- if operation == "replace":
+ if operation in ("replace", "delete"):
# Show diff for replace operations
diff_output = ""
@@ -451,7 +463,7 @@ def format_output(cls, coder, mcp_server, tool_response):
original_content=strip_hashline(original_content),
start_line_hash=start_line,
end_line_hash=end_line,
- operation="replace",
+ operation=operation,
text=strip_hashline(text),
)
except ContentHashError as e:
@@ -469,14 +481,4 @@ def format_output(cls, coder, mcp_server, tool_response):
coder.io.tool_output(text)
coder.io.tool_output("")
- elif operation == "delete":
- # Show deletion summary
- range_info = (
- f"Deleted {start_line} - {end_line}"
- if start_line and end_line
- else "specified range"
- )
- coder.io.tool_output(range_info)
- coder.io.tool_output("")
-
tool_footer(coder=coder, tool_response=tool_response, params=params)
diff --git a/cecli/tools/load_skill.py b/cecli/tools/load_skill.py
deleted file mode 100644
index f59beea940f..00000000000
--- a/cecli/tools/load_skill.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from cecli.tools.utils.base_tool import BaseTool
-
-
-class Tool(BaseTool):
- NORM_NAME = "loadskill"
- SCHEMA = {
- "type": "function",
- "function": {
- "name": "LoadSkill",
- "description": "Load a skill by name.",
- "parameters": {
- "type": "object",
- "properties": {
- "skill_name": {
- "type": "string",
- "description": "Name of the skill to load",
- },
- },
- "required": ["skill_name"],
- },
- },
- }
-
- @classmethod
- def execute(cls, coder, skill_name, **kwargs):
- """
- Load a skill by name (agent mode only).
- """
- if not skill_name:
- return "Error: Skill name is required."
-
- # Check if we're in agent mode
- if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
- return "Error: Skill loading is only available in agent mode."
-
- # Check if skills_manager is available
- if not hasattr(coder, "skills_manager") or coder.skills_manager is None:
- error_msg = "Error: Skills manager is not initialized. Skills may not be configured."
- # Check if skills directories are configured
- if hasattr(coder, "skills_directory_paths") and not coder.skills_directory_paths:
- error_msg += (
- "\nNo skills directories configured. Use --skills-paths to configure skill"
- " directories."
- )
- return error_msg
-
- # Use the instance method on skills_manager
- return coder.skills_manager.load_skill(skill_name)
diff --git a/cecli/tools/read_range.py b/cecli/tools/read_range.py
index cba2bfd6f29..67a46090c4b 100644
--- a/cecli/tools/read_range.py
+++ b/cecli/tools/read_range.py
@@ -212,7 +212,7 @@ def execute(cls, coder, read, **kwargs):
f"File {rel_path} is empty.",
(
"Next: use EditText with start_line @000 and end_line @000 to"
- " write content, or ContextManager to scaffold — do not call"
+ " write content, or ResourceManager to scaffold — do not call"
" ReadRange again on this empty file."
),
]
@@ -711,7 +711,7 @@ def format_model_response(cls, coder, rel_path, s_idx, e_idx, hashed_lines, curr
if start_found or end_found:
if start_found:
lines.append(
- f"File {rel_path} Snapshot (Lines {start_stub_s + 1} - {start_stub_e + 1}):"
+ f"File {rel_path} Current Snapshot (Lines {start_stub_s + 1} - {start_stub_e + 1}):"
)
lines.extend(hashed_lines[start_stub_s:start_stub_e])
@@ -723,7 +723,7 @@ def format_model_response(cls, coder, rel_path, s_idx, e_idx, hashed_lines, curr
):
lines.append("...⋮...")
lines.append(
- f"File {rel_path} Snapshot (Lines {end_stub_s + 1} - {end_stub_e + 1}):"
+ f"File {rel_path} Current Snapshot (Lines {end_stub_s + 1} - {end_stub_e + 1}):"
)
lines.extend(hashed_lines[end_stub_s:end_stub_e])
@@ -732,14 +732,21 @@ def format_model_response(cls, coder, rel_path, s_idx, e_idx, hashed_lines, curr
except Exception:
pass
- lines = [f"File {rel_path} Snapshot (Lines {s_idx + 1} - {e_idx + 1}):"]
+ lines = [f"File {rel_path} Current Snapshot (Lines {s_idx + 1} - {e_idx + 1}):"]
total = e_idx - s_idx
- if total <= 15:
+ hashed_content = "\n".join(hashed_lines[s_idx : e_idx + 1])
+ token_count = coder.main_model.token_count(hashed_content)
+
+ if token_count <= min(coder.large_file_token_threshold / 16, 512):
lines.extend(hashed_lines[s_idx : e_idx + 1])
else:
- lines.extend(hashed_lines[s_idx : s_idx + 5])
- lines.append("...⋮...")
- lines.extend(hashed_lines[e_idx - 4 : e_idx + 1])
+ if total <= 15:
+ lines.extend(hashed_lines[s_idx : e_idx + 1])
+ else:
+ lines.extend(hashed_lines[s_idx : s_idx + 5])
+ lines.append("...⋮...")
+ lines.extend(hashed_lines[e_idx - 4 : e_idx + 1])
+
lines.append("")
return "\n".join(lines)
diff --git a/cecli/tools/remove_skill.py b/cecli/tools/remove_skill.py
deleted file mode 100644
index a4e0e72eed9..00000000000
--- a/cecli/tools/remove_skill.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from cecli.tools.utils.base_tool import BaseTool
-
-
-class Tool(BaseTool):
- NORM_NAME = "removeskill"
- SCHEMA = {
- "type": "function",
- "function": {
- "name": "RemoveSkill",
- "description": "Remove a skill by name.",
- "parameters": {
- "type": "object",
- "properties": {
- "skill_name": {
- "type": "string",
- "description": "Name of the skill to remove",
- },
- },
- "required": ["skill_name"],
- },
- },
- }
-
- @classmethod
- def execute(cls, coder, skill_name, **kwargs):
- """
- Remove a skill by name (agent mode only).
- """
- if not skill_name:
- return "Error: Skill name is required."
-
- # Check if we're in agent mode
- if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
- return "Error: Skill removal is only available in agent mode."
-
- # Check if skills_manager is available
- if not hasattr(coder, "skills_manager") or coder.skills_manager is None:
- error_msg = "Error: Skills manager is not initialized. Skills may not be configured."
- # Check if skills directories are configured
- if hasattr(coder, "skills_directory_paths") and not coder.skills_directory_paths:
- error_msg += (
- "\nNo skills directories configured. Use --skills-paths to configure skill"
- " directories."
- )
- return error_msg
-
- # Use the instance method on skills_manager
- return coder.skills_manager.remove_skill(skill_name)
diff --git a/cecli/tools/resource_manager.py b/cecli/tools/resource_manager.py
new file mode 100644
index 00000000000..f9c634ffc01
--- /dev/null
+++ b/cecli/tools/resource_manager.py
@@ -0,0 +1,625 @@
+import os
+import re
+import time
+
+from cecli.commands.utils.helpers import (
+ is_server_globally_excluded,
+ iter_all_coders,
+ update_server_registration,
+)
+from cecli.helpers.background_commands import BackgroundCommandManager
+from cecli.tools.utils.base_tool import BaseTool
+from cecli.tools.utils.helpers import ToolError, parse_arg_as_list
+from cecli.tools.utils.output import color_markers, tool_footer, tool_header
+from cecli.tools.validations import ToolValidations
+
+
+class Tool(BaseTool):
+ NORM_NAME = "resourcemanager"
+ SCHEMA = {
+ "type": "function",
+ "function": {
+ "name": "ResourceManager",
+ "description": (
+ "Manage files, long running commands, skills, and MCP servers"
+ " in the chat context: add, read_only, create, remove files;"
+ " stop background commands; load/remove skills and load/remove MCP servers."
+ ),
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "add": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": (
+ "List of file paths to add to context. Limit to at most 2 at a time."
+ ),
+ },
+ "read_only": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": (
+ "List of file paths to add as read-only. Limit to at most 2 at a time."
+ ),
+ },
+ "create": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": "List of file paths to create.",
+ },
+ "remove": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": "List of file paths to remove from context.",
+ },
+ "stop": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": "List of command keys to stop background commands for.",
+ },
+ "load_skill": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": "List of skill names to load.",
+ },
+ "remove_skill": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": "List of skill names to remove.",
+ },
+ "load_mcp": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": (
+ "List of MCP server names to load. Use '*' to load all enabled servers."
+ ),
+ },
+ "remove_mcp": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": (
+ "List of MCP server names to remove. Use '*' to remove all connected servers."
+ ),
+ },
+ "actions": {
+ "type": "array",
+ "items": {"type": "string", "enum": ["list_mcp_servers"]},
+ "description": (
+ "List of action operations to perform. "
+ 'Possible values: "list_mcp_servers" to list MCP servers.'
+ ),
+ },
+ },
+ "additionalProperties": False,
+ "required": [],
+ },
+ },
+ }
+
+ @classmethod
+ async def execute(
+ cls,
+ coder,
+ remove=None,
+ add=None,
+ read_only=None,
+ create=None,
+ stop=None,
+ load_skill=None,
+ remove_skill=None,
+ load_mcp=None,
+ remove_mcp=None,
+ actions=None,
+ **kwargs,
+ ):
+ """Perform batch operations on the coder's context.
+
+ Parameters
+ ----------
+ coder: Coder instance
+ The active coder handling file context.
+ remove: list[str] | None
+ Files to remove from the context.
+ add: list[str] | None
+ Files to promote to editable status.
+ view: list[str] | None
+ Files to add as read-only view.
+ create: list[str] | None
+ Files to create and make editable.
+ stop: list[str] | None
+ Command keys to stop background commands for.
+ load_skill: list[str] | None
+ Skill names to load.
+ remove_skill: list[str] | None
+ Skill names to remove.
+ load_mcp: list[str] | None
+ MCP server names to load.
+ remove_mcp: list[str] | None
+ MCP server names to remove.
+ actions: list[str] | None
+ Action operations to perform (e.g., "list_mcp_servers").
+ """
+ remove_files = sorted(parse_arg_as_list(remove), key=cls._natural_sort_key)
+ editable_files = sorted(parse_arg_as_list(add), key=cls._natural_sort_key)
+ view_files = sorted(parse_arg_as_list(read_only), key=cls._natural_sort_key)
+ create_files = sorted(parse_arg_as_list(create), key=cls._natural_sort_key)
+ stop_keys = sorted(parse_arg_as_list(stop), key=cls._natural_sort_key)
+ load_skill_names = sorted(parse_arg_as_list(load_skill), key=cls._natural_sort_key)
+ remove_skill_names = sorted(parse_arg_as_list(remove_skill), key=cls._natural_sort_key)
+ load_mcp_servers = sorted(parse_arg_as_list(load_mcp), key=cls._natural_sort_key)
+ remove_mcp_servers = sorted(parse_arg_as_list(remove_mcp), key=cls._natural_sort_key)
+ action_operations = sorted(parse_arg_as_list(actions), key=cls._natural_sort_key)
+
+ if (
+ not remove_files
+ and not editable_files
+ and not view_files
+ and not create_files
+ and not stop_keys
+ and not load_skill_names
+ and not remove_skill_names
+ and not load_mcp_servers
+ and not remove_mcp_servers
+ and not action_operations
+ ):
+ raise ToolError(
+ "You must specify at least one of: remove, editable, view, create, stop, "
+ "load_skill, remove_skill, load_mcp, remove_mcp, or actions"
+ )
+
+ coder.io.tool_output("\u2b6d Modifying Context", type="tool-result")
+ messages = []
+
+ # Expand wildcards for MCP operations
+ if "*" in load_mcp_servers and coder.mcp_manager:
+ servers = coder.mcp_manager.servers or []
+ if isinstance(coder.mcp_manager.connected_servers, dict):
+ connected_names = set(coder.mcp_manager.connected_servers.keys())
+ else:
+ connected_names = {
+ getattr(s, "name", s) for s in coder.mcp_manager.connected_servers
+ }
+ load_mcp_servers = [
+ s.name
+ for s in servers
+ if s.name not in connected_names and s.config.get("enabled", True)
+ ]
+ if "*" in remove_mcp_servers and coder.mcp_manager:
+ if isinstance(coder.mcp_manager.connected_servers, dict):
+ remove_mcp_servers = list(coder.mcp_manager.connected_servers.keys())
+ else:
+ remove_mcp_servers = [
+ getattr(s, "name", s) for s in coder.mcp_manager.connected_servers
+ ]
+
+ # Before connecting any new MCP server, convert coders with empty
+ # included sets to explicit include lists.
+ if load_mcp_servers and coder.mcp_manager:
+ if isinstance(coder.mcp_manager.connected_servers, dict):
+ connected_names = set(coder.mcp_manager.connected_servers.keys())
+ else:
+ connected_names = {s.name for s in coder.mcp_manager.connected_servers}
+ if connected_names:
+ for c in iter_all_coders(coder):
+ if not c.registered_servers["included"]:
+ included = set(connected_names) - c.registered_servers["excluded"]
+ if c.edit_format in ("agent", "subagent"):
+ included.add("Local")
+ c.registered_servers["included"] = included
+
+ for f in create_files:
+ messages.append(cls._create(coder, f))
+ for f in remove_files:
+ messages.append(cls._remove(coder, f))
+ for f in view_files:
+ messages.append(cls._view(coder, f))
+ for f in editable_files:
+ try:
+ abs_path = coder.abs_root_path(f)
+ except Exception:
+ abs_path = None
+ if abs_path is not None and not os.path.isfile(abs_path):
+ coder.io.tool_output(f"ℹ️ `{f}` missing on disk — using **create** instead of add")
+ messages.append(cls._create(coder, f))
+ else:
+ messages.append(cls._editable(coder, f))
+ for key in stop_keys:
+ messages.append(cls._stop_command(coder, key))
+ for skill_name in load_skill_names:
+ messages.append(cls._load_skill(coder, skill_name))
+ for skill_name in remove_skill_names:
+ messages.append(cls._remove_skill(coder, skill_name))
+ for server_name in load_mcp_servers:
+ result = await cls._load_mcp(coder, server_name)
+ messages.append(result)
+ for server_name in remove_mcp_servers:
+ result = await cls._remove_mcp(coder, server_name)
+ messages.append(result)
+
+ for action_name in action_operations:
+ result = await cls._list_mcp_servers(coder)
+ messages.append(result)
+
+ tui = getattr(coder, "tui", None)
+ if tui and tui():
+ tui().refresh()
+
+ coder.context_blocks_cache = {}
+ coder.edit_allowed = True
+
+ return "\n".join(messages)
+
+ @classmethod
+ def format_output(cls, coder, mcp_server, tool_response):
+ """Format output for ResourceManager tool."""
+ color_start, color_end = color_markers(coder)
+
+ # Output header
+ tool_header(coder=coder, mcp_server=mcp_server, tool_response=tool_response)
+
+ try:
+ params = ToolValidations.validate_params(
+ tool_response.function.arguments, cls.VALIDATIONS, cls.SCHEMA
+ )
+ except ToolError:
+ coder.io.tool_error("Invalid Tool JSON")
+ return
+
+ # Define action display names
+ action_names = {
+ "create": "create",
+ "remove": "remove",
+ "view": "view",
+ "editable": "editable",
+ "stop": "stop",
+ "load_skill": "load_skill",
+ "remove_skill": "remove_skill",
+ "load_mcp": "load_mcp",
+ "remove_mcp": "remove_mcp",
+ "actions": "actions",
+ }
+
+ # Output each action with comma-separated file list
+ for action_key, display_name in action_names.items():
+ files = sorted(parse_arg_as_list(params.get(action_key)), key=cls._natural_sort_key)
+ if files:
+ file_list = ", ".join(files)
+ coder.io.tool_output(f"{color_start}{display_name}:{color_end} {file_list}")
+
+ tool_footer(coder=coder, tool_response=tool_response, params=params)
+
+ @classmethod
+ def _remove(cls, coder, file_path):
+ """Remove a file from the coder's context."""
+ from cecli.helpers.conversation import ConversationService
+
+ try:
+ abs_path = cls._resolve_file_path(coder, file_path)
+ rel_path = coder.get_rel_fname(abs_path)
+ removed = False
+
+ if abs_path in coder.abs_fnames:
+ coder.abs_fnames.remove(abs_path)
+ removed = True
+
+ if abs_path in coder.abs_read_only_fnames:
+ coder.abs_read_only_fnames.remove(abs_path)
+ removed = True
+
+ if not removed:
+ coder.io.tool_output(f"⚠ File '{file_path}' not in context", type="tool-result")
+ return f"File not in context: {file_path}"
+
+ coder.recently_removed[rel_path] = {"removed_at": time.time()}
+
+ if not file_path.startswith("command_key::"):
+ ConversationService.get_chunks(coder).defer_removal(abs_path)
+ ConversationService.get_chunks(coder).defer_removal(rel_path)
+
+ coder.io.tool_output(f"✗ Removed '{file_path}' from context", type="tool-result")
+ return (
+ f"Removed: {file_path}\n"
+ "Old file contents may remain visible. This is an acceptable system behavior."
+ )
+ except Exception as e:
+ coder.io.tool_error(f"Error removing file '{file_path}': {str(e)}")
+ return f"Error removing {file_path}: {e}"
+
+ @classmethod
+ def _stop_command(cls, coder, command_key):
+ """Stop a background command by its command key."""
+ try:
+ success, output, exit_code = BackgroundCommandManager.stop_background_command(
+ command_key
+ )
+ if success:
+ coder.io.tool_output(
+ f"✗ Stopped background command '{command_key}'", type="tool-result"
+ )
+ return (
+ f"Background command stopped: {command_key}\n"
+ f"Exit code: {exit_code}\n"
+ f"Final output:\n{output}"
+ )
+ else:
+ coder.io.tool_output(
+ f"⚠ Background command '{command_key}' not found or not running",
+ type="tool-result",
+ )
+ return f"Command not found or not running: {command_key}"
+ except Exception as e:
+ coder.io.tool_error(f"Error stopping command '{command_key}': {str(e)}")
+ return f"Error stopping {command_key}: {e}"
+
+ @classmethod
+ def _editable(cls, coder, file_path):
+ """Make a file editable in the coder's context."""
+ try:
+ abs_path = cls._resolve_file_path(coder, file_path)
+ if abs_path in coder.abs_fnames:
+ coder.io.tool_output(
+ f"🗀 File '{file_path}' is already editable", type="tool-result"
+ )
+ return f"Already editable: {file_path}"
+ if not os.path.isfile(abs_path):
+ coder.io.tool_output(f"⚠ File '{file_path}' not found on disk", type="tool-result")
+ return f"File not found: {file_path}"
+ was_read_only = False
+ if abs_path in coder.abs_read_only_fnames:
+ coder.abs_read_only_fnames.remove(abs_path)
+ was_read_only = True
+ coder.abs_fnames.add(abs_path)
+ if was_read_only:
+ coder.io.tool_output(
+ f"🗀 Moved '{file_path}' from read-only to editable", type="tool-result"
+ )
+ return f"Made editable (moved): {file_path}"
+ else:
+ coder.io.tool_output(
+ f"🗀 Added '{file_path}' directly to editable context", type="tool-result"
+ )
+ return f"Made editable (added): {file_path}"
+ except Exception as e:
+ coder.io.tool_error(f"Error making editable '{file_path}': {str(e)}")
+ return f"Error making editable {file_path}: {e}"
+
+ @classmethod
+ def _view(cls, coder, file_path):
+ """View a file (add as read‑only) in the coder's context."""
+ try:
+ resolved_path = cls._resolve_file_path(coder, file_path)
+ return coder._add_file_to_context(resolved_path, explicit=True)
+ except Exception as e:
+ coder.io.tool_error(f"Error viewing file '{file_path}': {str(e)}")
+ return f"Error viewing {file_path}: {e}"
+
+ @classmethod
+ def _create(cls, coder, file_path):
+ """Create a new file on the file system and make it editable in the coder's context."""
+ try:
+ abs_path = coder.abs_root_path(file_path)
+
+ # Check if file already exists
+ if os.path.exists(abs_path):
+ coder.io.tool_output(f"⚠ File '{file_path}' already exists", type="tool-result")
+ return f"File already exists: {file_path}"
+
+ # Create parent directories if they don't exist
+ os.makedirs(os.path.dirname(abs_path), exist_ok=True)
+
+ # Create an empty file
+ with open(abs_path, "w", encoding="utf-8"):
+ pass
+
+ # Add the file to editable context
+ coder.abs_fnames.add(abs_path)
+
+ coder.io.tool_output(
+ f"🗀 Created '{file_path}' and made it editable", type="tool-result"
+ )
+ return f"Created and made editable: {file_path}"
+
+ except Exception as e:
+ coder.io.tool_error(f"Error creating file '{file_path}': {str(e)}")
+ return f"Error creating {file_path}: {e}"
+
+ @classmethod
+ def _resolve_file_path(cls, coder, file_path):
+ """Resolve a file path, handling command_key:: aliases.
+
+ command_key::{command_key}/{filename} resolves to the actual
+ file path under the agent's local agent folder.
+ """
+ if file_path.startswith("command_key::"):
+ alias_path = file_path[len("command_key::") :]
+ parts = alias_path.split("/", 1)
+ if len(parts) == 2:
+ command_key = parts[0]
+ filename = parts[1]
+ rel_path = coder.local_agent_folder(f"{command_key}/{filename}")
+ return coder.abs_root_path(rel_path)
+ return coder.abs_root_path(file_path)
+
+ @classmethod
+ def _load_skill(cls, coder, skill_name):
+ """Load a skill by name."""
+ if not cls._is_context_block_active(coder, "skills"):
+ coder.io.tool_output(
+ f"⚠ Skills context block is not enabled. Skill '{skill_name}' cannot be loaded.",
+ type="tool-result",
+ )
+ return f"Skills context block not enabled: {skill_name}"
+
+ try:
+ if not hasattr(coder, "skills_manager") or coder.skills_manager is None:
+ coder.io.tool_output(
+ f"⚠ Skills manager not initialized. Skill '{skill_name}' not loaded.",
+ type="tool-result",
+ )
+ return f"Skills manager not initialized: {skill_name}"
+ return coder.skills_manager.load_skill(skill_name)
+ except Exception as e:
+ coder.io.tool_error(f"Error loading skill '{skill_name}': {str(e)}")
+ return f"Error loading skill {skill_name}: {e}"
+
+ @classmethod
+ def _remove_skill(cls, coder, skill_name):
+ """Remove a skill by name."""
+ if not cls._is_context_block_active(coder, "skills"):
+ coder.io.tool_output(
+ f"⚠ Skills context block is not enabled. Skill '{skill_name}' cannot be removed.",
+ type="tool-result",
+ )
+ return f"Skills context block not enabled: {skill_name}"
+
+ try:
+ if not hasattr(coder, "skills_manager") or coder.skills_manager is None:
+ coder.io.tool_output(
+ f"⚠ Skills manager not initialized. Skill '{skill_name}' not removed.",
+ type="tool-result",
+ )
+ return f"Skills manager not initialized: {skill_name}"
+ return coder.skills_manager.remove_skill(skill_name)
+ except Exception as e:
+ coder.io.tool_error(f"Error removing skill '{skill_name}': {str(e)}")
+ return f"Error removing skill {skill_name}: {e}"
+
+ @classmethod
+ async def _load_mcp(cls, coder, server_name):
+ """Load an MCP server by name."""
+ if not cls._is_context_block_active(coder, "servers"):
+ coder.io.tool_output(
+ f"⚠ Servers context block is not enabled. Server '{server_name}' cannot be loaded.",
+ type="tool-result",
+ )
+ return f"Servers context block not enabled: {server_name}"
+
+ try:
+ if not coder.mcp_manager or not coder.mcp_manager.servers:
+ return "No MCP servers found, nothing to load."
+
+ server = coder.mcp_manager.get_server(server_name)
+ if server is None:
+ return f"MCP server {server_name} does not exist."
+
+ if isinstance(coder.mcp_manager.connected_servers, dict):
+ connected_names = set(coder.mcp_manager.connected_servers.keys())
+ else:
+ connected_names = {s.name for s in coder.mcp_manager.connected_servers}
+ if server.name in connected_names:
+ return f"Server already loaded: {server_name}"
+ coder.interrupt_event.clear()
+ did_connect, interrupted = await coder.coroutines.interruptible(
+ coder.mcp_manager.connect_server(server_name),
+ coder.interrupt_event,
+ )
+
+ if interrupted:
+ return f"Interrupted: {server_name}"
+ if did_connect:
+ update_server_registration(coder, server_name, "include", force=True)
+ for other_coder in iter_all_coders(coder):
+ if other_coder is coder:
+ continue
+ update_server_registration(other_coder, server_name, "exclude", force=False)
+ return f"Loaded server: {server_name}"
+ else:
+ return f"Unable to load server: {server_name}"
+ except Exception as e:
+ coder.io.tool_error(f"Error loading MCP server '{server_name}': {str(e)}")
+ return f"Error loading MCP server {server_name}: {e}"
+
+ @classmethod
+ async def _remove_mcp(cls, coder, server_name):
+ """Remove an MCP server by name."""
+ if not cls._is_context_block_active(coder, "servers"):
+ coder.io.tool_output(
+ f"⚠ Servers context block is not enabled. Server '{server_name}' cannot be removed.",
+ type="tool-result",
+ )
+ return f"Servers context block not enabled: {server_name}"
+
+ try:
+ if not coder.mcp_manager or not coder.mcp_manager.servers:
+ return "No MCP servers are configured."
+
+ if server_name == "Local":
+ return "Cannot remove 'Local' server"
+
+ server = coder.mcp_manager.get_server(server_name)
+ if not server:
+ return f"MCP server {server_name} does not exist."
+ if isinstance(coder.mcp_manager.connected_servers, dict):
+ connected_names = set(coder.mcp_manager.connected_servers.keys())
+ else:
+ connected_names = {s.name for s in coder.mcp_manager.connected_servers}
+ if server.name not in connected_names:
+ return f"Server {server_name} is not currently connected."
+
+ update_server_registration(coder, server_name, "exclude", force=True)
+
+ all_excluded = is_server_globally_excluded(coder, server_name)
+
+ if all_excluded:
+ coder.interrupt_event.clear()
+ did_disconnect, interrupted = await coder.coroutines.interruptible(
+ coder.mcp_manager.disconnect_server(server_name),
+ coder.interrupt_event,
+ )
+ if interrupted:
+ return f"Interrupted: {server_name}"
+ if did_disconnect:
+ return f"Removed server: {server_name}"
+ else:
+ return f"Unable to remove server: {server_name}"
+ else:
+ return f"Removed from active coder, still active for others: {server_name}"
+ except Exception as e:
+ coder.io.tool_error(f"Error removing MCP server '{server_name}': {str(e)}")
+ return f"Error removing MCP server {server_name}: {e}"
+
+ @classmethod
+ async def _list_mcp_servers(cls, coder):
+ """List all loaded and configured MCP servers."""
+ if not coder.mcp_manager:
+ return "MCP manager is not configured."
+
+ all_servers = coder.mcp_manager.servers
+ connected_servers = coder.mcp_manager.connected_servers
+
+ loaded_server_names = {server.name for server in connected_servers}
+ configured_servers = [
+ server for server in all_servers if server.name not in loaded_server_names
+ ]
+
+ result = []
+ if loaded_server_names:
+ result.append("Loaded MCP Servers:")
+ for name in sorted(list(loaded_server_names)):
+ result.append(f"- {name}")
+ else:
+ result.append("No MCP servers are currently loaded.")
+
+ result.append("")
+
+ if configured_servers:
+ result.append("Configured MCP Servers:")
+ for server in sorted(configured_servers, key=lambda s: s.name):
+ result.append(f"- {server.name}")
+ else:
+ result.append("No other MCP servers are configured.")
+
+ return "\n".join(result)
+
+ @classmethod
+ def _is_context_block_active(cls, coder, block_name):
+ """Check if a context block is active in the coder's agent configuration."""
+ agent_config = getattr(coder, "agent_config", {})
+ include_blocks = agent_config.get("include_context_blocks", set())
+ exclude_blocks = agent_config.get("exclude_context_blocks", set())
+ return block_name in include_blocks and block_name not in exclude_blocks
+
+ @classmethod
+ def _natural_sort_key(cls, s: str) -> list:
+ """Natural sort key that splits "a10b2" into ["a", 10, "b", 2]."""
+ return [int(text) if text.isdigit() else text.lower() for text in re.split(r"(\d+)", s)]
diff --git a/cecli/tools/utils/__init__.py b/cecli/tools/utils/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/cecli/tools/utils/helpers.py b/cecli/tools/utils/helpers.py
index e97bc28e204..71ed132beb3 100644
--- a/cecli/tools/utils/helpers.py
+++ b/cecli/tools/utils/helpers.py
@@ -56,12 +56,12 @@ def validate_file_for_edit(coder, file_path):
if abs_path not in coder.abs_fnames:
if abs_path in coder.abs_read_only_fnames:
raise ToolError(
- f"File '{file_path}' is read-only. Make editable with `ContextManager` first."
+ f"File '{file_path}' is read-only. Make editable with `ResourceManager` first."
)
# else:
# # File exists but is not in context at all
# raise ToolError(
- # f"File '{file_path}' not in context. Make editable with `ContextManager` first."
+ # f"File '{file_path}' not in context. Make editable with `ResourceManager` first."
# )
# Reread content immediately before potential modification
diff --git a/cecli/tools/utils/registry.py b/cecli/tools/utils/registry.py
index fe0bffc8081..f582a617f75 100644
--- a/cecli/tools/utils/registry.py
+++ b/cecli/tools/utils/registry.py
@@ -19,7 +19,7 @@ class ToolRegistry:
"""Registry for tool discovery and management."""
_tools: Dict[str, Type] = {} # normalized name -> Tool class
- _essential_tools: Set[str] = {"contextmanager", "edittext", "yield"}
+ _essential_tools: Set[str] = {"resourcemanager", "edittext", "yield"}
_registry: Dict[str, Type] = {} # cached filtered registry
loaded_custom_tools: List[str] = []
diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index 1a2d3822792..eaa3ab8f40a 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -750,7 +750,6 @@ def update_spinner(self, msg, agent_name: str | None = None):
def show_error(self, message, agent_name: str | None = None):
"""Show an error message in the status bar."""
status_bar = self.query_one("#status-bar", StatusBar)
-
status_bar.show_notification(message, severity="error", timeout=5, agent_name=agent_name)
def on_resize(self) -> None:
diff --git a/cecli/watch.py b/cecli/watch.py
index f2c77ccbf96..e4c508cd8a9 100644
--- a/cecli/watch.py
+++ b/cecli/watch.py
@@ -7,7 +7,6 @@
from pathspec.patterns import GitWildMatchPattern
from cecli.dump import dump # noqa
-from cecli.helpers.grep_ast import TreeContext
from cecli.watch_prompts import watch_ask_prompt, watch_code_prompt
@@ -185,6 +184,7 @@ def stop(self):
def process_changes(self):
"""Get any detected file changes"""
+ from cecli.helpers.grep_ast import TreeContext
has_action = None
added = False
diff --git a/cecli/website/_includes/works-best.md b/cecli/website/_includes/works-best.md
index 73a18008872..0d54f78495b 100644
--- a/cecli/website/_includes/works-best.md
+++ b/cecli/website/_includes/works-best.md
@@ -1 +1 @@
-cecli works best with Claude 3.5 Sonnet, DeepSeek R1 & Chat V3, OpenAI o1, o3-mini & GPT-4o. cecli can [connect to almost any LLM, including local models](https://cecli.chat/docs/llms.html).
+cecli can [connect to almost any LLM, including local models](https://cecli.chat/docs/llms.html).
diff --git a/cecli/website/_sass/global-overrides.scss b/cecli/website/_sass/global-overrides.scss
index aa6d800a412..114a01bf357 100644
--- a/cecli/website/_sass/global-overrides.scss
+++ b/cecli/website/_sass/global-overrides.scss
@@ -17,7 +17,7 @@
// Grid layout for documentation pages on large screens
display: grid !important;
grid-template-columns: 300px 1fr; // Sidebar on left, main content on right
- grid-template-rows: auto 1fr; // Top nav bar, then main content
+ grid-template-rows: 100vh; // Top nav bar, then main content
grid-template-areas:
"sidebar topnav"
"sidebar main";
diff --git a/cecli/website/assets/home.css b/cecli/website/assets/home.css
index a566f660836..b2dd68d5d70 100644
--- a/cecli/website/assets/home.css
+++ b/cecli/website/assets/home.css
@@ -289,6 +289,124 @@ nav {
letter-spacing: -0.5px;
}
+/* ── Install Section ── */
+.install {
+ padding: 60px 0 40px;
+ text-align: center;
+}
+
+.install .code-block {
+ margin: 0 auto;
+ position: relative;
+}
+
+/* ── Install Grid (side-by-side cards) ── */
+.install-grid {
+ display: grid;
+ grid-template-columns: 1fr 1fr;
+ gap: 24px;
+ max-width: 800px;
+ margin: 0 auto;
+}
+
+.install-card {
+ background: #1a1a1a;
+ border-radius: 8px;
+ padding: 20px;
+ border-left: 3px solid var(--primary);
+}
+
+.install-card-header {
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ margin-bottom: 12px;
+ font-size: 0.95rem;
+ font-weight: 600;
+ color: var(--dark);
+}
+
+.install-card-header i {
+ font-size: 1.3rem;
+ color: var(--primary);
+}
+
+.install-card .code-block {
+ margin: 0;
+ padding: 0.75rem 1rem;
+ font-size: 0.85rem;
+}
+
+.install-card .code-block pre {
+ margin: 0;
+}
+
+/* ── Copy Button (clickable
) ── */
+.copy-btn {
+ cursor: pointer !important;
+ user-select: none;
+ transition: background-color 0.3s, border-color 0.3s, transform 0.2s;
+ border: 1px solid transparent;
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ flex: 1;
+ margin: 0;
+}
+
+.copy-btn:hover {
+ background-color: #2a2a2a !important;
+ border-color: var(--primary);
+ transform: translateY(-1px);
+}
+
+.copy-btn:active {
+ transform: translateY(0px);
+}
+
+.copy-btn:focus-visible {
+ outline: 2px solid var(--primary);
+ outline-offset: 2px;
+}
+
+.copy-btn code {
+ flex: 1;
+}
+
+.copy-btn i {
+ font-size: 1.2rem;
+ color: var(--gray);
+ transition: color 0.3s;
+ flex-shrink: 0;
+}
+
+.copy-btn:hover i {
+ color: white;
+}
+
+/* ── Install Note ── */
+.install-note {
+ margin-top: 1.5rem;
+ color: var(--gray);
+ font-size: 0.95rem;
+}
+
+.install-note a {
+ color: var(--primary);
+ text-decoration: none;
+ font-weight: 500;
+}
+
+.install-note a:hover {
+ text-decoration: underline;
+}
+
+@media (max-width: 768px) {
+ .install-grid {
+ grid-template-columns: 1fr;
+ }
+}
+
.feature-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
@@ -391,11 +509,11 @@ code, pre, .code-block {
.code-block {
background-color: var(--code-bg);
border-radius: 8px;
- padding: 1.5rem;
+ padding: 1rem;
color: white;
font-size: 1.1rem;
line-height: 1.5;
- margin: 1.5rem 0;
+ margin: 1rem 0;
overflow-x: auto;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1);
tab-size: 2;
diff --git a/cecli/website/docs/config.md b/cecli/website/docs/config.md
index 55b983ffa47..1412b27f815 100644
--- a/cecli/website/docs/config.md
+++ b/cecli/website/docs/config.md
@@ -1,5 +1,5 @@
---
-nav_order: 55
+nav_order: 40
has_children: true
description: Information on all of cecli's settings and how to use them.
---
diff --git a/cecli/website/docs/config/agent-mode.md b/cecli/website/docs/config/agent-mode.md
index d66ac7c14e7..f0d7e01fa02 100644
--- a/cecli/website/docs/config/agent-mode.md
+++ b/cecli/website/docs/config/agent-mode.md
@@ -48,10 +48,9 @@ Agent Mode uses a centralized local tool registry that manages all available too
- **File Discovery Tools**: `ExploreCode`, `Ls`, `Grep`
- **Editing Tools**: `EditText`,
-- **Context Management Tools**: `ContextManager`, `GetLines`
+- **Context Management Tools**: `ResourceManager`, `GetLines`
- **Git Tools**: `GitDiff`, `GitLog`, `GitShow`, `GitStatus`
- **Utility Tools**: `UpdateTodoList`, `UndoChange`, `Yield`
-- **Skill Management**: `LoadSkill`, `RemoveSkill`
- **Sub-Agent Tools**: `Delegate` - Delegate sub-tasks to specialized sub-agents
#### Enhanced Context Management
@@ -168,7 +167,7 @@ Agent Mode can also be configured directly in your configuration file. See the [
Certain tools are always available regardless of includelist/excludelist settings:
-- `ContextManager` - Add, drop, and make files editable in the context
+- `ResourceManager` - Add, drop, and make files editable in the context
- `edittext` - Basic text replacement
- `finished` - Complete the task
@@ -263,7 +262,7 @@ agent: true
# Agent Mode configuration
agent-config:
# Tool configuration
- tools_includelist: ["contextmanager", "edittext", "finished"] # Optional: Whitelist of tools
+ tools_includelist: ["resourcemanager", "edittext", "finished"] # Optional: Whitelist of tools
tools_excludelist: ["command", "commandinteractive"] # Optional: Blacklist of tools
tools_paths: ["./custom-tools", "~/my-tools"] # Optional: Directories or files containing custom tools
@@ -310,7 +309,6 @@ agent-config:
For complete documentation on creating and using skills, including skill directory structure, SKILL.md format, and best practices, see the [Skills documentation](https://github.com/dwash96/cecli/blob/main/cecli/website/docs/config/skills.md).
### Benefits
-
- **Autonomous operation**: Reduces need for manual file management
- **Context awareness**: Real-time project information improves decision making
- **Precision editing**: Granular tools reduce errors compared to SEARCH/REPLACE
diff --git a/cecli/website/docs/config/custom-system-prompts.md b/cecli/website/docs/config/custom-system-prompts.md
index 0fb2082caa1..e93651f23e1 100644
--- a/cecli/website/docs/config/custom-system-prompts.md
+++ b/cecli/website/docs/config/custom-system-prompts.md
@@ -83,7 +83,7 @@ main_system: |
## Core Directives
- **Role**: Act as an expert software engineer.
- - **Act Proactively**: Autonomously use file discovery and context management tools (`ViewFilesAtGlob`, `ViewFilesMatching`, `Ls`, `ContextManager`) to gather information and fulfill the user's request. Chain tool calls across multiple turns to continue exploration.
+ - **Act Proactively**: Autonomously use file discovery and context management tools (`ViewFilesAtGlob`, `ViewFilesMatching`, `Ls`, `ResourceManager`) to gather information and fulfill the user's request. Chain tool calls across multiple turns to continue exploration.
- **Be Decisive**: Trust that your initial findings are valid. Refrain from asking the same question or searching for the same term in multiple similar ways.
- **Be Concise**: Keep all responses brief and direct (1-3 sentences). Avoid preamble, postamble, and unnecessary explanations. Do not repeat yourself.
- **Be Careful**: Break updates down into smaller, more manageable chunks. Focus on one thing at a time.
diff --git a/cecli/website/docs/config/skills.md b/cecli/website/docs/config/skills.md
index a53286097da..148e3bae0d4 100644
--- a/cecli/website/docs/config/skills.md
+++ b/cecli/website/docs/config/skills.md
@@ -3,6 +3,7 @@ parent: Configuration
nav_order: 35
description: Extend AI capabilities with custom instructions, reference materials, scripts, and assets through the skills system.
---
+
# Skills System
Agent Mode includes a powerful skills system that allows you to extend the AI's capabilities with custom instructions, reference materials, scripts, and assets. Skills are organized collections of knowledge and tools that help the AI perform specific tasks more effectively.
diff --git a/cecli/website/docs/faq.md b/cecli/website/docs/faq.md
deleted file mode 100644
index 71590935d44..00000000000
--- a/cecli/website/docs/faq.md
+++ /dev/null
@@ -1,379 +0,0 @@
----
-nav_order: 90
-description: Frequently asked questions about cecli.
----
-
-# FAQ
-{: .no_toc }
-
-- TOC
-{:toc}
-
-{% include help-tip.md %}
-
-## How can I add ALL the files to the chat?
-
-People regularly ask about how to add **many or all of their repo's files** to the chat.
-This is probably not a good idea and will likely do more harm than good.
-
-The best approach is think about which files need to be changed to accomplish
-the task you are working on. Just add those files to the chat.
-
-Usually when people want to add "all the files" it's because they think it
-will give the LLM helpful context about the overall code base.
-cecli will automatically give the LLM a bunch of additional context about
-the rest of your git repo.
-It does this by analyzing your entire codebase in light of the
-current chat to build a compact
-[repository map](https://cecli.dev/2023/10/22/repomap.html).
-
-Adding a bunch of files that are mostly irrelevant to the
-task at hand will often distract or confuse the LLM.
-The LLM will give worse coding results, and sometimese even fail to correctly edit files.
-Addings extra files will also increase your token costs.
-
-Again, it's usually best to just add the files to the chat that will need to be modified.
-If you still wish to add lots of files to the chat, you can:
-
-- Use a wildcard when you launch cecli: `cecli src/*.py`
-- Use a wildcard with the in-chat `/add` command: `/add src/*.py`
-- Give the `/add` command a directory name and it will recursively add every file under that dir: `/add src`
-
-## Can I use cecli in a large (mono) repo?
-
-cecli will work in any size repo, but is not optimized for quick
-performance and response time in very large repos.
-There are some things you can do to improve performance.
-
-Be sure to check the
-[general usage tips](/docs/usage/tips.html)
-before considering this large-repo specific advice.
-To get the best results from cecli you want to
-be thoughtful about how you add files to the chat,
-regardless of your repo size.
-
-You can change into a sub directory of your repo that contains the
-code you want to work on and use the `--subtree-only` switch.
-This will tell cecli to ignore the repo outside of the
-directory you start in.
-
-You can also create a `.cecli.ignore` file to tell cecli
-to ignore parts of the repo that aren't relevant to your task.
-This file conforms to `.gitignore` syntax and conventions.
-For example, to focus only on specific directories in a monorepo,
-you could create a `.cecli.ignore` file with:
-
-```
-# Ignore everything
-/*
-
-# Allow specific directories and their contents
-!foo/
-!bar/
-!baz/
-
-# Allow nested files under these directories
-!foo/**
-!bar/**
-!baz/**
-```
-
-You can use `--cecli-ignore ` to name a specific file
-to use for ignore patterns.
-You might have a few of these handy for when you want to work on
-frontend, backend, etc portions of your repo.
-
-## Can I use cecli with multiple git repos at once?
-
-Currently cecli can only work with one repo at a time.
-
-There are some things you can try if you need to work with
-multiple interrelated repos:
-
-- You can run cecli in repo-A where you need to make a change
-and use `/read` to add some files read-only from another repo-B.
-This can let cecli see key functions or docs from the other repo.
-- You can run `cecli --show-repo-map > map.md` within each
-repo to create repo maps.
-You could then run cecli in repo-A and
-use `/read ../path/to/repo-B/map.md` to share
-a high level map of the other repo.
-- You can use cecli to write documentation about a repo.
-Inside each repo, you could run `cecli docs.md`
-and work with cecli to write some markdown docs.
-Then while using cecli to edit repo-A
-you can `/read ../path/to/repo-B/docs.md` to
-read in those docs from the other repo.
-- In repo A, ask cecli to write a small script that demonstrates
-the functionality you want to use in repo B.
-Then when you're using cecli in repo B, you can
-`/read` in that script.
-
-## How do I turn on the repository map?
-
-Depending on the LLM you are using, cecli may launch with the repo map disabled by default:
-
-```
-Repo-map: disabled
-```
-
-This is because weaker models get easily overwhelmed and confused by the content of the
-repo map. They sometimes mistakenly try to edit the code in the repo map.
-The repo map is usually disabled for a good reason.
-
-If you would like to force it on, you can run cecli with `--map-tokens 1024`.
-
-## How do I include the git history in the context?
-
-When starting a fresh cecli session, you can include recent git history in the chat context. This can be useful for providing the LLM with information about recent changes. To do this:
-
-1. Use the `/run` command with `git diff` to show recent changes:
- ```
- /run git diff HEAD~1
- ```
- This will include the diff of the last commit in the chat history.
-
-2. To include diffs from multiple commits, increase the number after the tilde:
- ```
- /run git diff HEAD~3
- ```
- This will show changes from the last three commits.
-
-Remember, the chat history already includes recent changes made during the current session, so this tip is most useful when starting a new cecli session and you want to provide context about recent work.
-
-You can also use cecli to review PR branches:
-
-```
-/run git diff one-branch..another-branch
-
-...
-
-Add 6.9k tokens of command output to the chat? (Y)es/(N)o [Yes]: Yes
-
-/ask Are there any problems with the way this change works with the FooBar class?
-```
-
-And of course you can prepare diff output outside of cecli and provide it as
-a file for cecli to read:
-
-```
-$ git diff -C10 v1..v2 > v1-v2-changes.diff
-$ cecli --read v1-v2-changes.diff
-
-cecli v0.77.2.dev+import
-Main model: anthropic/claude-3-7-sonnet-20250219 with diff edit format, 8k think tokens
-──────────────────────────────────
-v1-v2-changes.diff
-> Do you see any potential bugs in this PR?
-```
-
-
-{: .tip }
-The `/git` command will not work for this purpose, as its output is not included in the chat.
-
-## How can I run cecli locally from source code?
-
-To run the project locally, follow these steps:
-
-```
-# Clone the repository
-git clone git@github.com:cecli-AI/cecli.git
-
-# Navigate to the project directory
-cd cecli
-
-# It's recommended to make a virtual environment
-
-# Install cecli in editable/development mode,
-# so it runs from the latest copy of these source files
-python -m pip install -e .
-
-# Run the local version of cecli
-python -m cecli
-```
-
-
-
-## Can I change the system prompts that cecli uses?
-
-The most convenient way to add custom instructions is to use a
-[conventions file](https://cecli.dev/docs/usage/conventions.html).
-
-But, cecli is set up to support different actual system prompts and edit formats
-in a modular way. If you look in the `cecli/coders` subdirectory, you'll
-see there's a base coder with base prompts, and then there are
-a number of
-different specific coder implementations.
-
-If you're thinking about experimenting with system prompts
-this document about
-[benchmarking GPT-3.5 and GPT-4 on code editing](https://cecli.dev/docs/benchmarks.html)
-might be useful background.
-
-While it's not well documented how to add new coder subsystems, you may be able
-to modify an existing implementation or use it as a template to add another.
-
-To get started, try looking at and modifying these files.
-
-The wholefile coder is currently used by GPT-3.5 by default. You can manually select it with `--edit-format whole`.
-
-- wholefile_coder.py
-- wholefile_prompts.py
-
-The editblock coder is currently used by GPT-4o by default. You can manually select it with `--edit-format diff`.
-
-- editblock_coder.py
-- editblock_prompts.py
-
-The universal diff coder is currently used by GPT-4 Turbo by default. You can manually select it with `--edit-format udiff`.
-
-- udiff_coder.py
-- udiff_prompts.py
-
-When experimenting with coder backends, it helps to run cecli with `--verbose --no-pretty` so you can see
-all the raw information being sent to/from the LLM in the conversation.
-
-You can also refer to the
-[instructions for installing a development version of cecli](https://cecli.dev/docs/install/optional.html#install-the-development-version-of-cecli).
-
-## What LLMs do you use to build cecli?
-
-cecli writes a lot of its own code, usually about 70% of the new code in each
-release.
-People often ask which LLMs I use with cecli, when writing cecli.
-Below is a table showing the models I have used recently,
-extracted from the
-[public log](https://github.com/dwash96/cecli/blob/main/cecli/website/assets/sample-analytics.jsonl)
-of my
-[cecli analytics](https://cecli.dev/docs/more/analytics.html).
-
-
-
-
-
Model Name
Total Tokens
Percent
-
gemini/gemini-2.5-pro
222,047
29.7%
-
gpt-5
211,072
28.2%
-
None
168,988
22.6%
-
gemini/gemini-3-pro-preview
81,851
11.0%
-
o3-pro
36,620
4.9%
-
gemini/gemini-2.5-flash-lite
15,470
2.1%
-
gemini/gemini-2.5-flash-lite-preview-06-17
11,371
1.5%
-
-
-
-## How are the "cecli wrote xx% of code" stats computed?
-
-[cecli is tightly integrated with git](/docs/git.html) so all
-of cecli's code changes are committed to the repo with proper attribution.
-The
-[stats are computed](https://github.com/dwash96/cecli/blob/main/scripts/blame.py)
-by doing something like `git blame` on the repo,
-and counting up who wrote all the new lines of code in each release.
-Only lines in source code files are counted, not documentation or prompt files.
-
-## Why did cecli ignore/discard its proposed edits after it asked to add a new file to the chat?
-
-If cecli prompts you to add a new file to the chat and you say yes,
-it will re-submit the original request.
-The fact that the LLM's reply indicated that it needed to see another file (and you said yes)
-is often a sign that the LLM should have been able to see/edit that file in the first place.
-Without access to it, there is increased chance that it's done a bad implementation of the requested change.
-Often LLMs will hallucinate content for the files they needed but didn't have.
-So cecli re-submits the original request in this situation.
-
-## Why does cecli sometimes stop highlighting code in its replies?
-
-cecli displays the markdown responses that are coming back from the LLM.
-Usually, the LLM will reply with code in a markdown "code block" with
-triple backtick fences, like this:
-
-````
-Here's some code:
-
-```
-print("hello")
-```
-````
-
-But if you've added files to the chat that contain triple backticks,
-cecli needs to tell the LLM to use a different set of fences.
-Otherwise, the LLM can't safely include your code's triple backticks
-inside the code blocks that it returns with edits.
-cecli will use fences like `...` in this case.
-
-A side effect of this is that the code that cecli outputs may no
-longer be properly highlighted.
-You will most often notice this if you add markdown files
-to you chats that contain code blocks.
-
-## Why is the LLM speaking to me in an unexpected language?
-
-cecli goes to some effort to prompt the model to use the language that is configured
-for your system.
-But LLMs aren't fully reliable, and they sometimes decide to speak in
-an unexpected language.
-Claude is especially fond of speaking French.
-
-You can explicitly set the language that cecli tells the model to use with
-`--chat-language `.
-But the LLM may not comply.
-
-## Can I share my cecli chat transcript?
-
-Yes, you can now share cecli chat logs in a pretty way.
-
-1. Copy the markdown logs you want to share from `.cecli.chat.history.md` and make a github gist. Or publish the raw markdown logs on the web any way you'd like.
-
- ```
- https://gist.github.com/cecli-AI/2087ab8b64034a078c0a209440ac8be0
- ```
-
-2. Take the gist URL and append it to:
-
- ```
- https://cecli.dev/share/?mdurl=
- ```
-
-This will give you a URL like this, which shows the chat history like you'd see in a terminal:
-
-```
-https://cecli.dev/share/?mdurl=https://gist.github.com/cecli-AI/2087ab8b64034a078c0a209440ac8be0
-```
-
-## Can I edit files myself while cecli is running?
-
-Yes. cecli always reads the latest copy of files from the file
-system when you send each message.
-
-While you're waiting for cecli's reply to complete, it's probably unwise to
-edit files that you've added to the chat.
-Your edits and cecli's edits might conflict.
-
-## What is cecli AI LLC?
-
-cecli AI LLC is the company behind the cecli AI coding tool.
-cecli is
-[open source and available on GitHub](https://github.com/dwash96/cecli)
-under an
-[Apache 2.0 license](https://github.com/dwash96/cecli/blob/main/LICENSE.txt).
-
-## Can I Script cecli?
-
-Yes. You can script cecli via the command line or python. See more from here: [Scripting cecli](https://cecli.dev/docs/scripting.html)
-
-
-
-
diff --git a/cecli/website/docs/install.md b/cecli/website/docs/install.md
index 00656d3a351..0591eb9d36a 100644
--- a/cecli/website/docs/install.md
+++ b/cecli/website/docs/install.md
@@ -8,24 +8,6 @@ description: How to install and get started pair programming with cecli.
# Installation
{: .no_toc }
-
-## Get started quickly with uv
-
-We recommend using [uv](https://docs.astral.sh/uv/) for installing cecli as it provides the best experience and isolates cecli from your development environment.
-
-```bash
-uv tool install --native-tls --python python3.12 cecli-dev
-```
-
-This will install cecli in its own separate python environment.
-If needed,
-uv will also install a separate version of python 3.12 to use with cecli (cecli supports Python 3.10-3.14).
-
-Once cecli is installed,
-there are also some [optional install steps](/docs/install/optional.html).
-
-See the [usage instructions](https://cecli.dev/docs/usage.html) to start coding with cecli.
-
## One-liners
These one-liners will install cecli, along with python 3.12 if needed (cecli supports Python 3.10-3.14).
@@ -52,7 +34,6 @@ wget -qO- https://cecli.dev/install.sh | sh
powershell -ExecutionPolicy ByPass -c "irm https://cecli.dev/install.ps1 | iex"
```
-
## Install with uv
You can install cecli with uv:
@@ -106,17 +87,43 @@ or
uv pip install --native-tls cecli-dev
```
-{% include python-m-aider.md %}
+## Basic Configuration
+
+We highly recommend using an `.cecli.conf.yml` file in your project directories. A good place to get started is:
+
+```yaml
+model:
+agent: true
+auto-commits: true
+auto-save: true
+cache-prompts: true
+check-update: true
+enable-context-compaction: true
+context-compaction-max-tokens: 0.8
+show-model-warnings: true
+
+agent-config:
+ large_file_token_threshold: 8192
+ skip_cli_confirmations: false
+
+mcp-servers:
+ mcpServers:
+ context7:
+ transport: http
+ url: https://mcp.context7.com/mcp
+```
-#### Installing with package managers
+### Run Program
+
+If you are in the directory with your .cecli.conf.yml file, then simply running `cecli` will start the agent with your configuration. For best results, since terminal emulators can be finicky, we highly suggest running:
+
+```bash
+cecli --terminal-setup
+```
-It's best to install cecli using one of methods
-recommended above.
-While cecli is available in a number of system package managers,
-they often install cecli with incorrect dependencies.
+On first run to configure keybindings for the program (notably `shift+enter`). Support for terminals is ongoing so feel free to make a github issue or chat in the discord for us to figure out what's needed to support automatically setting up a given terminal.
## Next steps...
-There are some [optional install steps](/docs/install/optional.html) you could consider.
See the [usage instructions](https://cecli.dev/docs/usage.html) to start coding with cecli.
diff --git a/cecli/website/docs/llms.md b/cecli/website/docs/llms.md
index 0dbb4b23ab6..cdb508182ac 100644
--- a/cecli/website/docs/llms.md
+++ b/cecli/website/docs/llms.md
@@ -1,25 +1,19 @@
---
title: Connecting to LLMs
-nav_order: 40
+nav_order: 55
has_children: true
description: cecli can connect to most LLMs for AI pair programming.
---
-# cecli can connect to most LLMs
-{: .no_toc }
-
-[](https://cecli.dev/assets/llms.jpg)
-
-
-## Best models
+## Recommended models
{: .no_toc }
cecli works best with these models, which are skilled at editing code:
-- [Gemini 2.5 Pro](/docs/llms/gemini.html)
-- [DeepSeek R1 and V3](/docs/llms/deepseek.html)
-- [Claude 3.7 Sonnet](/docs/llms/anthropic.html)
-- [OpenAI o3, o4-mini and GPT-4.1](/docs/llms/openai.html)
+- [Gemini 3+](/docs/llms/gemini.html)
+- [DeepSeek V4+](/docs/llms/deepseek.html)
+- [Claude 4+](/docs/llms/anthropic.html)
+- [GPT 5+](/docs/llms/openai.html)
## Free models
@@ -28,12 +22,11 @@ cecli works best with these models, which are skilled at editing code:
cecli works with a number of **free** API providers:
- [OpenRouter offers free access to many models](https://openrouter.ai/models/?q=free), with limitations on daily usage.
-- Google's [Gemini 2.5 Pro Exp](/docs/llms/gemini.html) works very well with cecli.
## Local models
{: .no_toc }
-cecli can work also with local models, for example using [Ollama](/docs/llms/ollama.html).
+cecli can also work with local models, for example using [Ollama](/docs/llms/ollama.html).
It can also access
local models that provide an
[Open AI compatible API](/docs/llms/openai-compat.html).
@@ -41,14 +34,10 @@ local models that provide an
## Use a capable model
{: .no_toc }
-Check
-[cecli's LLM leaderboards](https://cecli.dev/docs/leaderboards/)
-to see which models work best with cecli.
-
Be aware that cecli may not work well with less capable models.
If you see the model returning code, but cecli isn't able to edit your files
and commit the changes...
this is usually because the model isn't capable of properly
returning "code edits".
-Models weaker than GPT 3.5 may have problems working well with cecli.
+Models weaker than GPT 4o may have problems working well with cecli.
diff --git a/cecli/website/docs/usage.md b/cecli/website/docs/usage.md
index eef6c97f0f4..549ad0c00dd 100644
--- a/cecli/website/docs/usage.md
+++ b/cecli/website/docs/usage.md
@@ -1,5 +1,5 @@
---
-nav_order: 30
+nav_order: 55
has_children: true
description: How to use cecli to pair program with AI and edit code in your local git repo.
---
diff --git a/cecli/website/docs/install/optional.md b/cecli/website/docs/usage/optional.md
similarity index 98%
rename from cecli/website/docs/install/optional.md
rename to cecli/website/docs/usage/optional.md
index 5833833423e..6aa97e574cd 100644
--- a/cecli/website/docs/install/optional.md
+++ b/cecli/website/docs/usage/optional.md
@@ -1,6 +1,6 @@
---
-parent: Installation
-nav_order: 20
+parent: Usage
+nav_order: 25
---
# Optional steps
diff --git a/cecli/website/docs/usage/tips.md b/cecli/website/docs/usage/tips.md
index 6e0fddd9e3b..b1440c9c22a 100644
--- a/cecli/website/docs/usage/tips.md
+++ b/cecli/website/docs/usage/tips.md
@@ -1,6 +1,6 @@
---
parent: Usage
-nav_order: 25
+nav_order: 20
description: Tips for AI pair programming with cecli.
---
diff --git a/cecli/website/index.html b/cecli/website/index.html
index 07f969985f8..00a92d0e563 100644
--- a/cecli/website/index.html
+++ b/cecli/website/index.html
@@ -37,15 +37,16 @@
-
Oh, to live in a terminal
+
An ode to the CLI
- Cecli wears many hats:
- A pair programmer, a researcher, a writer,
- a general automaton and most fundamentally, an assistant
+ Here, we spoke to the machine
+ Now, the machine speaks back
+ So we journey, together