用 LangChain 建立對話式 API

前言

如果你想建一個有 AI 能力的後端 API — 比如客服機器人、文件問答系統、或是智慧助手 — LangChain 是目前最主流的框架。它幫你把 LLM、Prompt、記憶、工具呼叫這些東西封裝好，讓你可以用比較結構化的方式組裝 AI 應用。

老實說，LangChain 的學習曲線不算低，抽象層很多，文件也經常跟不上程式碼的更新速度。但一旦搞懂它的核心概念，開發效率確實會提升很多。這篇文章會帶你從 LangChain 的核心概念開始，一路做到一個完整的對話式 API，搭配 FastAPI 部署。

LangChain 核心概念

架構總覽

LangChain 核心元件:

[LLM/ChatModel]     最底層，負責文字生成
       │
[PromptTemplate]    管理 prompt 的模板
       │
[Chain]              把多個步驟串起來
       │
[Agent]              有「決策能力」的 Chain，可以選擇用什麼工具
       │
[Tool]               Agent 可以呼叫的外部功能（搜尋、計算、API...）
       │
[Memory]             對話記憶，讓 AI 記住上下文

安裝

# 基本安裝 pip install langchain langchain-community langchain-core # 搭配 OpenAI pip install langchain-openai # 搭配 Ollama（本地 LLM） pip install langchain-ollama # 搭配 FastAPI pip install fastapi uvicorn pydantic

# 完整安裝 pip install langchain langchain-community langchain-openai \ langchain-ollama fastapi uvicorn pydantic redis

從基礎開始

基本的 LLM 呼叫

from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, SystemMessage

# 初始化 LLM（使用本地 Ollama）
llm = ChatOllama(
    model="llama3.1",
    temperature=0.7,
    base_url="http://localhost:11434",
)

# 最簡單的呼叫
response = llm.invoke("用一句話解釋什麼是 Docker")
print(response.content)

# 帶系統 prompt 的呼叫
messages = [
    SystemMessage(content="你是一位資深的後端工程師，用繁體中文簡潔地回答問題。"),
    HumanMessage(content="Redis 的主要用途有哪些？"),
]
response = llm.invoke(messages)
print(response.content)

Prompt Template

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# 基本模板
prompt = ChatPromptTemplate.from_messages([
    ("system", "你是一位 {role}，擅長 {specialty}。用繁體中文回答。"),
    ("human", "{question}"),
])

# 使用模板
chain = prompt | llm
response = chain.invoke({
    "role": "後端工程師",
    "specialty": "分散式系統設計",
    "question": "設計一個高可用的訊息佇列系統要考慮什麼？"
})
print(response.content)

Chain：串接多個步驟

from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

# 範例：程式碼 review chain
# Step 1: 分析程式碼
analyze_prompt = ChatPromptTemplate.from_messages([
    ("system", "你是一位程式碼審查專家。分析以下程式碼的潛在問題。"),
    ("human", "

{language}\n{code}\n“"), ])


# Step 2: 生成改善建議

suggest_prompt = ChatPromptTemplate.from_messages([

    ("system", "根據以下程式碼分析結果，提供具體的改善建議和修改後的程式碼。"),

    ("human", "原始分析：\n{analysis}\n\n請提供改善建議。"),

])
# 串接

analysis_chain = analyze_prompt | llm | StrOutputParser()

suggestion_chain = suggest_prompt | llm | StrOutputParser()
# 完整 chain

def code_review(language: str, code: str) -> str:

    analysis = analysis_chain.invoke({

        "language": language,

        "code": code,

    })

    suggestion = suggestion_chain.invoke({

        "analysis": analysis,

    })

    return suggestion
# 使用

result = code_review("python", """

def get_user(id):

    conn = psycopg2.connect("dbname=mydb user=admin password=123")

    cur = conn.cursor()

    cur.execute(f"SELECT * FROM users WHERE id = {id}")

    return cur.fetchone()

""")

print(result)

<pre><code>## Agent 和 Tool
Agent 是 LangChain 最強大的功能之一。它讓 LLM 自己決定要使用哪些工具來完成任務。</code></pre>python

from langchain.agents import AgentExecutor, create_tool_calling_agent

from langchain_core.tools import tool

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

import subprocess

import httpx
# 定義工具

@tool

def run_python(code: str) -> str:

    """執行 Python 程式碼並回傳結果。用於需要計算或資料處理的場景。"""

    try:

        result = subprocess.run(

            ["python3", "-c", code],

            capture_output=True,

            text=True,

            timeout=10,

        )

        if result.returncode == 0:

            return result.stdout or "（執行成功，無輸出）"

        else:

            return f"錯誤：{result.stderr}"

    except subprocess.TimeoutExpired:

        return "錯誤：執行超時"
@tool

def search_docs(query: str) -> str:

    """搜尋內部技術文件。用於回答關於專案架構、API 規格等問題。"""

    # 這裡簡化處理，實際可以接 RAG 系統

    docs = {

        "api": "我們的 API 使用 REST 架構，基本路徑是 /api/v1/",

        "auth": "認證使用 JWT token，有效期限 24 小時",

        "database": "主資料庫是 PostgreSQL 15，快取用 Redis 7",

    }

    results = []

    for key, value in docs.items():

        if key in query.lower():

            results.append(value)

    return "\n".join(results) if results else "找不到相關文件"
@tool

def check_service_status(service_name: str) -> str:

    """檢查服務的運行狀態。可以檢查 api、database、cache 等服務。"""

    # 模擬檢查

    statuses = {

        "api": "running (response time: 45ms)",

        "database": "running (connections: 23/100)",

        "cache": "running (memory: 256MB/1GB)",

    }

    return statuses.get(

        service_name.lower(),

        f"未知服務: {service_name}"

    )
# 建立 Agent

tools = [run_python, search_docs, check_service_status]
agent_prompt = ChatPromptTemplate.from_messages([

    ("system", """你是一位 DevOps 助手，可以幫忙查詢系統狀態、搜尋文件、

    和執行簡單的計算。用繁體中文回答。

    在回答之前，請先使用適當的工具獲取資訊。"""),

    MessagesPlaceholder(variable_name="chat_history", optional=True),

    ("human", "{input}"),

    MessagesPlaceholder(variable_name="agent_scratchpad"),

])
# 注意：tool calling agent 需要支援 tool calling 的模型

# 如果用 Ollama，確保模型支援（如 llama3.1）

agent = create_tool_calling_agent(llm, tools, agent_prompt)

agent_executor = AgentExecutor(

    agent=agent,

    tools=tools,

    verbose=True,   # 印出思考過程

    max_iterations=5,

)
# 使用

result = agent_executor.invoke({

    "input": "檢查一下所有服務的狀態，然後告訴我資料庫的連線數是否正常"

})

print(result["output"])

<pre><code>## 對話記憶
<h3>基本記憶</code></pre>python</h3>

from langchain_community.chat_message_histories import ChatMessageHistory

from langchain_core.runnables.history import RunnableWithMessageHistory
# 記憶存儲（In-Memory）

store = {}
def get_session_history(session_id: str):

    if session_id not in store:

        store[session_id] = ChatMessageHistory()

    return store[session_id]
# 帶記憶的 chain

prompt_with_history = ChatPromptTemplate.from_messages([

    ("system", "你是一位友善的技術助手。用繁體中文回答。"),

    MessagesPlaceholder(variable_name="history"),

    ("human", "{input}"),

])
chain = prompt_with_history | llm | StrOutputParser()
chain_with_history = RunnableWithMessageHistory(

    chain,

    get_session_history,

    input_messages_key="input",

    history_messages_key="history",

)
# 多輪對話

config = {"configurable": {"session_id": "user-123"}}
r1 = chain_with_history.invoke({"input": "我叫小明"}, config=config)

print(r1)  # "你好小明！..."
r2 = chain_with_history.invoke({"input": "我叫什麼名字？"}, config=config)

print(r2)  # "你叫小明..."

<pre><code>### Redis 持久化記憶</code></pre>python

from langchain_community.chat_message_histories import RedisChatMessageHistory
def get_redis_history(session_id: str):

    return RedisChatMessageHistory(

        session_id=session_id,

        url="redis://localhost:6379/0",

        ttl=3600,  # 1 小時後過期

    )
# 其他程式碼跟上面一樣，只是換了 history 的 backend

chain_with_redis = RunnableWithMessageHistory(

    chain,

    get_redis_history,

    input_messages_key="input",

    history_messages_key="history",

)

<pre><code>## 整合 FastAPI
這是最終的目標 — 把所有東西包成一個 REST API：</code></pre>python

# app.py

from fastapi import FastAPI, HTTPException

from pydantic import BaseModel

from contextlib import asynccontextmanager
from langchain_ollama import ChatOllama

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langchain_core.output_parsers import StrOutputParser

from langchain_core.runnables.history import RunnableWithMessageHistory

from langchain_community.chat_message_histories import ChatMessageHistory
# ─── 初始化 ───

store = {}
def get_session_history(session_id: str):

    if session_id not in store:

        store[session_id] = ChatMessageHistory()

    return store[session_id]
@asynccontextmanager

async def lifespan(app: FastAPI):

    # 啟動時初始化

    print("AI Chat API 已啟動")

    yield

    # 關閉時清理

    print("AI Chat API 已關閉")
app = FastAPI(title="AI Chat API", lifespan=lifespan)
# ─── LLM 設定 ───

llm = ChatOllama(

    model="llama3.1",

    temperature=0.7,

    base_url="http://localhost:11434",

)
prompt = ChatPromptTemplate.from_messages([

    ("system", """你是一位專業的技術助手。

    - 用繁體中文回答

    - 回答要簡潔但完整

    - 如果不確定，請明確告知

    - 提供程式碼範例時使用 markdown 格式"""),

    MessagesPlaceholder(variable_name="history"),

    ("human", "{input}"),

])
chain = prompt | llm | StrOutputParser()
chain_with_history = RunnableWithMessageHistory(

    chain,

    get_session_history,

    input_messages_key="input",

    history_messages_key="history",

)
# ─── API Models ───

class ChatRequest(BaseModel):

    message: str

    session_id: str = "default"
class ChatResponse(BaseModel):

    reply: str

    session_id: str
class HistoryResponse(BaseModel):

    session_id: str

    messages: list[dict]
# ─── API Endpoints ───

@app.post("/chat", response_model=ChatResponse)

async def chat(request: ChatRequest):

    """對話端點"""

    try:

        config = {"configurable": {"session_id": request.session_id}}

        reply = chain_with_history.invoke(

            {"input": request.message},

            config=config,

        )

        return ChatResponse(reply=reply, session_id=request.session_id)

    except Exception as e:

        raise HTTPException(status_code=500, detail=str(e))
@app.get("/history/{session_id}", response_model=HistoryResponse)

async def get_history(session_id: str):

    """取得對話歷史"""

    if session_id not in store:

        return HistoryResponse(session_id=session_id, messages=[])
history = store[session_id]

    messages = [

        {"role": msg.type, "content": msg.content}

        for msg in history.messages

    ]

    return HistoryResponse(session_id=session_id, messages=messages)
@app.delete("/history/{session_id}")

async def clear_history(session_id: str):

    """清除對話歷史"""

    if session_id in store:

        del store[session_id]

    return {"message": f"Session {session_id} cleared"}
@app.get("/health")

async def health():

    """健康檢查"""

    return {"status": "ok", "model": "llama3.1"}

<pre><code></code></pre>bash

# 啟動 API

uvicorn app:app --host 0.0.0.0 --port 8000 --reload
# 測試

curl -X POST http://localhost:8000/chat \

  -H "Content-Type: application/json" \

  -d '{"message": "什麼是 Docker？", "session_id": "user-1"}'
# 繼續對話

curl -X POST http://localhost:8000/chat \

  -H "Content-Type: application/json" \

  -d '{"message": "它跟虛擬機有什麼差別？", "session_id": "user-1"}'
# 查看對話歷史

curl http://localhost:8000/history/user-1

<pre><code>### Streaming 支援</code></pre>python

from fastapi.responses import StreamingResponse

from langchain_core.output_parsers import StrOutputParser
@app.post("/chat/stream")

async def chat_stream(request: ChatRequest):

    """串流回應端點"""

    config = {"configurable": {"session_id": request.session_id}}
async def generate():

        async for chunk in chain_with_history.astream(

            {"input": request.message},

            config=config,

        ):

            if chunk:

                yield f"data: {chunk}\n\n"

        yield "data: [DONE]\n\n"
return StreamingResponse(

        generate(),

        media_type="text/event-stream",

    )

<pre><code></code></pre>javascript

// 前端接收串流

const response = await fetch('/chat/stream', {

  method: 'POST',

  headers: { 'Content-Type': 'application/json' },

  body: JSON.stringify({ message: '解釋微服務架構', session_id: 'user-1' }),

});
const reader = response.body.getReader();

const decoder = new TextDecoder();
while (true) {

  const { done, value } = await reader.read();

  if (done) break;

const text = decoder.decode(value); const lines = text.split('\n').filter(l => l.startsWith('data: ')); for (const line of lines) { const content = line.replace('data: ', ''); if (content === '[DONE]') break; process.stdout.write(content); } }“

小結

LangChain 的生態系很大，這篇文章只涵蓋了最核心的部分。但有了這些基礎，你已經能建立一個可用的對話式 API 了。

幾個實戰建議：

從簡單的 Chain 開始，不要一上來就用 Agent
記憶管理很重要，對話太長會超過 context window，需要做 summarization 或 sliding window
錯誤處理不能省，LLM 的回應不穩定，API 要能優雅地處理各種異常
監控和日誌，用 LangSmith 或自己的 logging 追蹤每次呼叫的 token 使用和回應品質

前言