{"id":779,"date":"2026-05-04T09:58:23","date_gmt":"2026-05-04T09:58:23","guid":{"rendered":"https:\/\/cyb3rjan.com\/?p=779"},"modified":"2026-05-04T14:24:07","modified_gmt":"2026-05-04T14:24:07","slug":"running-claude-code-fully-local-no-api-costs-no-cloud","status":"publish","type":"post","link":"https:\/\/cyb3rjan.com\/index.php\/2026\/05\/04\/running-claude-code-fully-local-no-api-costs-no-cloud\/","title":{"rendered":"Running Claude Code Fully Local \u2014 No API Costs, No Cloud"},"content":{"rendered":"\n<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Running Claude Code Fully Local \u2014 No API Costs, No Cloud<\/title>\n<style>\n  @import url('https:\/\/fonts.googleapis.com\/css2?family=Bitter:ital,wght@0,400;0,600;1,400&family=JetBrains+Mono:wght@400;500&family=DM+Sans:wght@400;500&display=swap');\n\n  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }\n\n  :root {\n    --accent: #c45a1a;\n    --accent-light: rgba(196,90,26,0.12);\n    --code-bg: #1e1e1a;\n    --code-fg: #d4cfbe;\n    --code-comment: #7a7a6a;\n    --code-string: #a8c07a;\n    --code-key: #e0a070;\n    --border: rgba(128,128,128,0.25);\n  }\n\n  \/* body: NO background, NO color \u2014 let the theme handle it *\/\n  .blog-post-wrap {\n    font-family: 'DM Sans', sans-serif;\n    line-height: 1.7;\n    font-size: 17px;\n  }\n\n  \/* \u2500\u2500 Header \u2500\u2500 *\/\n  .hero {\n    background: #1a1a18;\n    color: #f7f5f0;\n    padding: 72px 24px 56px;\n    position: relative;\n    overflow: hidden;\n    margin-bottom: 0;\n  }\n  .hero::before {\n    content: '';\n    position: absolute;\n    inset: 0;\n    background: repeating-linear-gradient(\n      -45deg,\n      transparent,\n      transparent 40px,\n      rgba(255,255,255,0.015) 40px,\n      rgba(255,255,255,0.015) 41px\n    );\n  }\n  .hero-inner {\n    max-width: 760px;\n    margin: 0 auto;\n    position: relative;\n  }\n  .hero-tag {\n    display: inline-block;\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 11px;\n    letter-spacing: 0.12em;\n    text-transform: uppercase;\n    color: var(--accent);\n    border: 1px solid var(--accent);\n    padding: 4px 10px;\n    border-radius: 3px;\n    margin-bottom: 24px;\n  }\n  .hero h1 {\n    font-family: 'Bitter', serif;\n    font-size: clamp(28px, 5vw, 46px);\n    font-weight: 600;\n    line-height: 1.2;\n    color: #f7f5f0;\n    margin-bottom: 20px;\n    max-width: 680px;\n  }\n  .hero-sub {\n    font-size: 18px;\n    color: rgba(247,245,240,0.65);\n    max-width: 580px;\n    line-height: 1.6;\n    margin-bottom: 32px;\n  }\n  .hero-meta {\n    display: flex;\n    align-items: center;\n    gap: 20px;\n    flex-wrap: wrap;\n  }\n  .hero-meta span {\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 12px;\n    color: rgba(247,245,240,0.45);\n  }\n  .hero-meta span + span::before {\n    content: '\u00b7';\n    margin-right: 20px;\n  }\n\n  \/* \u2500\u2500 Layout \u2500\u2500 *\/\n  .blog-post-wrap .wrapper {\n    max-width: 760px;\n    margin: 0 auto;\n    padding: 0 24px 96px;\n  }\n\n  \/* \u2500\u2500 Prose \u2014 inherits theme color \u2500\u2500 *\/\n  .blog-post-wrap .prose { padding-top: 48px; }\n  .blog-post-wrap .prose p { margin-bottom: 1.5rem; }\n  .blog-post-wrap .prose h2 {\n    font-family: 'Bitter', serif;\n    font-size: 26px;\n    font-weight: 600;\n    margin: 3rem 0 1rem;\n    padding-bottom: 10px;\n    border-bottom: 2px solid var(--accent);\n    display: inline-block;\n  }\n  .blog-post-wrap .prose h3 {\n    font-family: 'DM Sans', sans-serif;\n    font-size: 17px;\n    font-weight: 500;\n    margin: 2rem 0 0.75rem;\n    letter-spacing: 0.01em;\n  }\n  .blog-post-wrap .prose ul,\n  .blog-post-wrap .prose ol {\n    padding-left: 1.5rem;\n    margin-bottom: 1.5rem;\n  }\n  .blog-post-wrap .prose li { margin-bottom: 0.5rem; }\n  .blog-post-wrap .prose strong { font-weight: 500; }\n  .blog-post-wrap .prose a { color: var(--accent); text-decoration: underline; text-underline-offset: 3px; }\n\n  \/* \u2500\u2500 Inline code \u2500\u2500 *\/\n  .blog-post-wrap code {\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 13.5px;\n    background: rgba(128,128,128,0.15);\n    border: 1px solid var(--border);\n    padding: 1px 6px;\n    border-radius: 4px;\n    color: var(--accent);\n  }\n\n  \/* \u2500\u2500 Code blocks \u2500\u2500 *\/\n  .blog-post-wrap .code-block {\n    background: var(--code-bg);\n    border-radius: 8px;\n    margin: 1.75rem 0;\n    overflow: hidden;\n    border: 1px solid #2e2e28;\n  }\n  .blog-post-wrap .code-block-header {\n    display: flex;\n    align-items: center;\n    justify-content: space-between;\n    padding: 10px 16px;\n    background: #161612;\n    border-bottom: 1px solid #2e2e28;\n  }\n  .blog-post-wrap .code-block-lang {\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 11px;\n    letter-spacing: 0.08em;\n    text-transform: uppercase;\n    color: #5a5a4a;\n  }\n  .blog-post-wrap .code-block pre {\n    padding: 20px;\n    overflow-x: auto;\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 13px;\n    line-height: 1.7;\n    color: var(--code-fg);\n  }\n  .blog-post-wrap .code-block pre .c { color: var(--code-comment); font-style: italic; }\n  .blog-post-wrap .code-block pre .s { color: var(--code-string); }\n  .blog-post-wrap .code-block pre .k { color: var(--code-key); }\n  .blog-post-wrap .code-block pre .v { color: #8fc0d8; }\n\n  \/* \u2500\u2500 Callout boxes \u2500\u2500 *\/\n  .blog-post-wrap .callout {\n    border-left: 3px solid var(--accent);\n    background: var(--accent-light);\n    padding: 16px 20px;\n    border-radius: 0 6px 6px 0;\n    margin: 1.75rem 0;\n  }\n  .blog-post-wrap .callout-title {\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 11px;\n    letter-spacing: 0.1em;\n    text-transform: uppercase;\n    color: var(--accent);\n    margin-bottom: 6px;\n  }\n  .blog-post-wrap .callout p { margin: 0; font-size: 15px; }\n\n  .blog-post-wrap .callout.info {\n    border-color: #2a6090;\n    background: rgba(42,96,144,0.12);\n  }\n  .blog-post-wrap .callout.info .callout-title { color: #5a9fd4; }\n\n  \/* \u2500\u2500 Spec table \u2500\u2500 *\/\n  .blog-post-wrap .spec-table {\n    width: 100%;\n    border-collapse: collapse;\n    margin: 1.75rem 0;\n    font-size: 15px;\n  }\n  .blog-post-wrap .spec-table th {\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 11px;\n    letter-spacing: 0.08em;\n    text-transform: uppercase;\n    opacity: 0.5;\n    padding: 8px 12px;\n    text-align: left;\n    border-bottom: 1px solid var(--border);\n  }\n  .blog-post-wrap .spec-table td {\n    padding: 10px 12px;\n    border-bottom: 1px solid var(--border);\n    vertical-align: top;\n  }\n  .blog-post-wrap .spec-table tr:last-child td { border-bottom: none; }\n  .blog-post-wrap .spec-table td:first-child {\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 13px;\n    opacity: 0.7;\n    white-space: nowrap;\n  }\n\n  \/* \u2500\u2500 Step counter \u2500\u2500 *\/\n  .blog-post-wrap .steps { counter-reset: step; list-style: none; padding: 0; margin: 1.75rem 0; }\n  .blog-post-wrap .steps li {\n    counter-increment: step;\n    display: flex;\n    gap: 16px;\n    margin-bottom: 2rem;\n    align-items: flex-start;\n  }\n  .blog-post-wrap .steps li::before {\n    content: counter(step);\n    flex-shrink: 0;\n    width: 28px; height: 28px;\n    background: var(--accent);\n    color: white;\n    font-family: 'JetBrains Mono', monospace;\n    font-size: 13px;\n    font-weight: 500;\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    border-radius: 50%;\n    margin-top: 2px;\n  }\n  .blog-post-wrap .steps li > div { flex: 1; }\n  .blog-post-wrap .steps li > div strong { display: block; font-weight: 500; margin-bottom: 4px; }\n\n  \/* \u2500\u2500 Divider \u2500\u2500 *\/\n  .blog-post-wrap .divider {\n    border: none;\n    height: 1px;\n    background: var(--border);\n    margin: 3rem 0;\n  }\n\n  \/* \u2500\u2500 Footer note \u2500\u2500 *\/\n  .blog-post-wrap .footnote {\n    font-size: 14px;\n    opacity: 0.5;\n    border-top: 1px solid var(--border);\n    padding-top: 1.5rem;\n    margin-top: 3rem;\n    font-family: 'JetBrains Mono', monospace;\n  }\n<\/style>\n<\/head>\n<body>\n\n<div class=\"blog-post-wrap\">\n\n<div class=\"hero\">\n  <div class=\"hero-inner\">\n    <div class=\"hero-tag\">Homelab \u00b7 AI Tools<\/div>\n    <h1>Running Claude Code Fully Local \u2014 No API Costs, No Cloud<\/h1>\n    <p class=\"hero-sub\">How to wire up the Claude Code CLI to a local Ollama instance on your home network, with a one-shot .bat installer for Windows.<\/p>\n    <div class=\"hero-meta\">\n      <span>May 2026<\/span>\n      <span>7 min read<\/span>\n      <span>Windows \u00b7 Ollama \u00b7 RTX 4070 Ti<\/span>\n    <\/div>\n  <\/div>\n<\/div>\n\n<div class=\"wrapper\">\n<div class=\"prose\">\n\n<p>Claude Code is Anthropic&#8217;s agentic CLI \u2014 it reads your codebase, writes diffs, runs commands, and reasons across multiple files at once. The catch: by default it phones home to Anthropic&#8217;s API on every keystroke, which adds up fast. If you&#8217;re running a decent GPU at home, there&#8217;s no reason to pay for that.<\/p>\n\n<p>This guide walks through pointing Claude Code at a local <strong>Ollama<\/strong> instance instead \u2014 completely offline, zero API costs \u2014 and wraps the whole thing into a single <code>.bat<\/code> script you can run once and forget.<\/p>\n\n<div class=\"callout info\">\n  <div class=\"callout-title\">Setup at a glance<\/div>\n  <p>Claude Code CLI on Windows CMD \u2192 Ollama on 192.168.1.10:11434 \u2192 RTX 4070 Ti (12 GB VRAM) + 64 GB RAM. No WSL, no Docker, no cloud.<\/p>\n<\/div>\n\n<h2>Why this works<\/h2>\n\n<p>Claude Code speaks the Anthropic Messages API format. Ollama (v0.14+) natively understands that same format. So you can redirect Claude Code&#8217;s outbound requests to your local Ollama server by setting two environment variables:<\/p>\n\n<div class=\"code-block\">\n  <div class=\"code-block-header\"><span class=\"code-block-lang\">cmd<\/span><\/div>\n  <pre><span class=\"k\">ANTHROPIC_BASE_URL<\/span>=<span class=\"s\">http:\/\/192.168.1.10:11434<\/span>\n<span class=\"k\">ANTHROPIC_API_KEY<\/span>=<span class=\"s\">ollama<\/span>   <span class=\"c\">:: required but ignored by Ollama<\/span><\/pre>\n<\/div>\n\n<p>That&#8217;s the entire trick. Everything else \u2014 the CLI, the file editing, the reasoning loop \u2014 stays identical. The model running under the hood is just different.<\/p>\n\n<h2>Hardware and model selection<\/h2>\n\n<p>With 12 GB of VRAM and 64 GB of system RAM, you have two practical options:<\/p>\n\n<table class=\"spec-table\">\n  <thead>\n    <tr><th>Model<\/th><th>Quantization<\/th><th>VRAM<\/th><th>RAM offload<\/th><th>Speed<\/th><\/tr>\n  <\/thead>\n  <tbody>\n    <tr>\n      <td>qwen2.5-coder:14b<\/td>\n      <td>Q4_K_M (~9 GB)<\/td>\n      <td>fits fully<\/td>\n      <td>none<\/td>\n      <td>fast (~30\u201340 tok\/s)<\/td>\n    <\/tr>\n    <tr>\n      <td>qwen2.5-coder:32b<\/td>\n      <td>Q4_K_M (~20 GB)<\/td>\n      <td>~12 GB<\/td>\n      <td>~8 GB to RAM<\/td>\n      <td>moderate (~10\u201320 tok\/s)<\/td>\n    <\/tr>\n  <\/tbody>\n<\/table>\n\n<p>The 32B model produces noticeably better multi-file diffs and follows complex instructions more reliably. The offloading to RAM adds latency but is fully functional with 64 GB available. For most homelab coding tasks, the 32B is worth it.<\/p>\n\n<div class=\"callout\">\n  <div class=\"callout-title\">Context window matters<\/div>\n  <p>Claude Code needs a large context window to hold your entire codebase. Configure Ollama to use at least 64k tokens, otherwise it will truncate mid-session and produce broken output.<\/p>\n<\/div>\n\n<p>You do this with a custom Modelfile:<\/p>\n\n<div class=\"code-block\">\n  <div class=\"code-block-header\"><span class=\"code-block-lang\">powershell<\/span><\/div>\n  <pre><span class=\"c\"># Create Modelfile<\/span>\n<span class=\"v\">@\"\nFROM qwen2.5-coder:32b\nPARAMETER num_ctx 65536\n\"@<\/span> | Out-File -Encoding utf8 Modelfile\n\nollama create <span class=\"s\">qwen-coder-64k<\/span> -f Modelfile<\/pre>\n<\/div>\n\n<h2>Step-by-step setup<\/h2>\n\n<p>Here is the manual version. If you want the automated script, skip to the next section.<\/p>\n\n<ol class=\"steps\">\n  <li>\n    <div>\n      <strong>Expose Ollama on your network<\/strong>\n      By default Ollama only listens on <code>localhost<\/code>. Set <code>OLLAMA_HOST=0.0.0.0:11434<\/code> as a Windows system environment variable on the machine running Ollama, then restart the Ollama service.\n    <\/div>\n  <\/li>\n  <li>\n    <div>\n      <strong>Pull and configure the model<\/strong>\n      Run <code>ollama pull qwen2.5-coder:32b<\/code> on the Ollama machine, then create the 64k context variant with the Modelfile above.\n    <\/div>\n  <\/li>\n  <li>\n    <div>\n      <strong>Install Claude Code CLI<\/strong>\n      Requires Node.js. From any CMD window: <code>npm install -g @anthropic-ai\/claude-code<\/code>\n    <\/div>\n  <\/li>\n  <li>\n    <div>\n      <strong>Set environment variables<\/strong>\n      Use <code>setx<\/code> to make them persistent across sessions. Also set <code>CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1<\/code> to prevent telemetry calls to Anthropic.\n    <\/div>\n  <\/li>\n  <li>\n    <div>\n      <strong>Bypass the onboarding screen<\/strong>\n      Claude Code prompts you to log into Anthropic on first run. Skip it by creating <code>%USERPROFILE%\\.claude.json<\/code> with <code>\"hasCompletedOnboarding\": true<\/code>.\n    <\/div>\n  <\/li>\n  <li>\n    <div>\n      <strong>Pre-warm the model<\/strong>\n      Send a dummy request to Ollama before starting Claude Code. Ollama loads models lazily \u2014 without pre-warming, the first Claude Code request will time out while waiting for the model to load into VRAM.\n    <\/div>\n  <\/li>\n<\/ol>\n\n<h2>The automated .bat script<\/h2>\n\n<p>Rather than doing all of the above by hand every time you set up a new machine, the steps above are packaged into a single <code>setup-claude-code.bat<\/code> file. Run it once as Administrator.<\/p>\n\n<p>What it does, in order:<\/p>\n\n<ul>\n  <li>Pings Ollama on <code>192.168.1.10:11434<\/code> and warns if unreachable<\/li>\n  <li>Lists available models and asks which one to use<\/li>\n  <li>Installs <code>@anthropic-ai\/claude-code<\/code> via npm<\/li>\n  <li>Sets all required environment variables with <code>setx<\/code> (persistent, user-scope)<\/li>\n  <li>Creates or merges <code>%USERPROFILE%\\.claude.json<\/code> with the onboarding bypass<\/li>\n  <li>Pre-warms the selected model via a test API call<\/li>\n<\/ul>\n\n<div class=\"code-block\">\n  <div class=\"code-block-header\"><span class=\"code-block-lang\">bat \u2014 excerpt<\/span><\/div>\n  <pre><span class=\"c\">:: Set all vars persistently<\/span>\nsetx <span class=\"k\">ANTHROPIC_BASE_URL<\/span> <span class=\"s\">\"http:\/\/192.168.1.10:11434\"<\/span>\nsetx <span class=\"k\">ANTHROPIC_AUTH_TOKEN<\/span> <span class=\"s\">\"ollama\"<\/span>\nsetx <span class=\"k\">ANTHROPIC_API_KEY<\/span> <span class=\"s\">\"ollama\"<\/span>\nsetx <span class=\"k\">CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC<\/span> <span class=\"s\">\"1\"<\/span>\nsetx <span class=\"k\">CLAUDE_CODE_DEFAULT_MODEL<\/span> <span class=\"s\">\"%MODEL%\"<\/span>\n\n<span class=\"c\">:: Onboarding bypass via Node<\/span>\nnode -e <span class=\"s\">\"const fs=require('fs'); let d={}; try{d=JSON.parse(fs.readFileSync(path))}catch(e){} d.hasCompletedOnboarding=true; fs.writeFileSync(path,JSON.stringify(d,null,2))\"<\/span><\/pre>\n<\/div>\n\n<p>After running the script, restart CMD so the <code>setx<\/code> variables take effect, then simply type <code>claude<\/code> from any project folder.<\/p>\n\n<hr class=\"divider\">\n\n<h2>Daily usage<\/h2>\n\n<p>Once set up, usage is identical to the cloud-backed version. Navigate to a project folder in CMD and run:<\/p>\n\n<div class=\"code-block\">\n  <div class=\"code-block-header\"><span class=\"code-block-lang\">cmd<\/span><\/div>\n  <pre>cd C:\\projects\\my-app\nclaude<\/pre>\n<\/div>\n\n<p>Claude Code will index your project, and you can give it instructions like you normally would. The requests go to <code>192.168.1.10:11434<\/code> instead of Anthropic&#8217;s servers. No internet required, no usage costs, no rate limits.<\/p>\n\n<h2>Known limitations<\/h2>\n\n<p>Local models are not Claude. Qwen 2.5 Coder 32B is excellent for its size, but it will occasionally struggle with:<\/p>\n\n<ul>\n  <li><strong>Large multi-file refactors<\/strong> \u2014 the model may lose track of context across many files simultaneously<\/li>\n  <li><strong>Complex architectural decisions<\/strong> \u2014 reasoning depth is shallower than Claude Sonnet or Opus<\/li>\n  <li><strong>First response latency<\/strong> \u2014 even with pre-warming, the first response after a long idle period takes a few seconds while layers reload<\/li>\n<\/ul>\n\n<p>For homelab work, personal projects, and routine coding tasks these limitations rarely matter. For large-scale production refactors where you need the best possible result, a hybrid setup \u2014 local for exploration, real Claude API for the final pass \u2014 is worth considering.<\/p>\n\n<div class=\"callout\">\n  <div class=\"callout-title\">Download<\/div>\n  <p>The full <code>setup-claude-code.bat<\/code> script is available in the GitHub repository linked below. Change the Ollama IP on line 8 if yours differs from 192.168.1.10.<\/p>\n<\/div>\n\n<p class=\"footnote\">\/\/ tested on Windows 11 \u00b7 Ollama 0.14+ \u00b7 Claude Code CLI 1.x \u00b7 qwen2.5-coder:32b Q4_K_M<\/p>\n\n<\/div>\n<\/div>\n\n<\/div><!-- \/.blog-post-wrap -->\n\n<\/body>\n<\/html>\n","protected":false},"excerpt":{"rendered":"<p>Running Claude Code Fully Local \u2014 No API Costs, No Cloud Homelab \u00b7 AI Tools Running Claude Code Fully Local \u2014 No API Costs, No Cloud How to wire up the Claude Code CLI to a local Ollama instance on your home network, with a one-shot .bat installer for Windows. May 2026 7 min read [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":785,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[7,8],"tags":[],"class_list":["post-779","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-homelab","category-ai-tools"],"blocksy_meta":[],"_links":{"self":[{"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/posts\/779","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/comments?post=779"}],"version-history":[{"count":2,"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/posts\/779\/revisions"}],"predecessor-version":[{"id":824,"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/posts\/779\/revisions\/824"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/media\/785"}],"wp:attachment":[{"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/media?parent=779"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/categories?post=779"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/cyb3rjan.com\/index.php\/wp-json\/wp\/v2\/tags?post=779"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}