配置如下:
Mac Mini M2
16GB运行内存
- 前往 https://ollama.com/ 下载MacOS的安装包 一直安装就行
- 安装完毕后打开终端,输入
ollama run deepseek-r1:14b
,等待下载完毕
- 下载完毕后其实在终端就可以直接使用了,只不过自己用的话应该是没有部署必要的,折腾了一下午,最终实现了局域网的访问
过程全部是问AI实现的
Python代码如下
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from sse_starlette.sse import EventSourceResponse
import aiohttp
import json
import asyncio
from random import choice
from typing import Dict, List
from uuid import uuid4
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
# 添加 CORS 中间件
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 多个 Ollama 实例的地址
OLLAMA_INSTANCES = [
"http://localhost:11434/api/generate",
# "http://localhost:11435/api/generate",
# "http://localhost:11436/api/generate",
]
# 用于存储每个用户的对话历史
user_contexts: Dict[str, Dict[str, List]] = {} # {session_id: {"history": [], "ip": str
# 显示输入表单的GET路由
@app.get("/", response_class=HTMLResponse)
async def get_form():
return """
<html>
<head>
<title>DeepSeek-R1对话</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
#chat-history {
height: 500px;
border: 1px solid #ccc;
padding: 10px;
margin-bottom: 20px;
overflow-y: auto;
}
.user-message {
background-color: #f0f0f0;
margin: 10px 0;
padding: 8px;
border-radius: 5px;
}
.assistant-message {
background-color: #e3f2fd;
margin: 10px 0;
padding: 8px;
border-radius: 5px;
}
pre {
background-color: #f5f5f5;
padding: 10px;
border-radius: 5px;
overflow-x: auto;
white-space: pre-wrap; /* 确保代码换行 */
word-wrap: break-word; /* 长单词或代码行换行 */
}
code {
font-family: Consolas, "Courier New", monospace;
font-size: 14px;
color: #333;
}
</style>
</head>
<body>
<h2>DeepSeek-R1对话</h2>
<div id="chat-history"></div>
<form id="query-form" onsubmit="submitQuery(event)">
<input type="text" id="query" name="query" placeholder="输入你的问题" style="width: 300px;">
<input type="submit" value="提交">
</form>
<script>
let sessionId = null;
let currentResponse = null;
// 初始化会话
fetch('/init_session')
.then(response => response.json())
.then(data => {
sessionId = data.session_id;
// 可以存储到localStorage实现持久化
localStorage.setItem('llm_session', sessionId);
});
function appendMessage(role, content) {
const chatHistory = document.getElementById('chat-history');
const div = document.createElement('div');
div.className = role === 'user' ? 'user-message' : 'assistant-message';
if (role === 'assistant' && content.startsWith('```')) {
// 如果是代码块,使用 <pre> 和 <code> 标签
div.innerHTML = `<strong>${role === 'user' ? '用户' : '助手'}:</strong> <pre><code>${content}</code></pre>`;
} else {
div.innerHTML = `<strong>${role === 'user' ? '用户' : '助手'}:</strong> ${content}`;
}
chatHistory.appendChild(div);
chatHistory.scrollTop = chatHistory.scrollHeight; // 自动滚动到底部
}
function submitQuery(event) {
event.preventDefault();
const query = document.getElementById('query').value;
appendMessage('user', query);
const source = new EventSource(`/stream?query=${encodeURIComponent(query)}&session=${sessionId}`);
source.onmessage = function(event) {
const data = JSON.parse(event.data);
if (!data.done) {
if (!currentResponse) currentResponse = '';
currentResponse += data.chunk;
// 更新最后一条助手消息
const lastAssistantMsg = document.querySelector('#chat-history div:last-child');
if (lastAssistantMsg && lastAssistantMsg.innerText.startsWith('助手')) {
lastAssistantMsg.innerHTML = `<strong>助手:</strong> <pre><code>${currentResponse}</code></pre>`;
} else {
appendMessage('assistant', currentResponse);
}
}
if (data.done) {
currentResponse = null;
source.close();
}
};
source.onerror = function() {
document.getElementById('chat-history').innerHTML += '<div class="error-message">错误:无法获取响应</div>';
source.close();
};
}
</script>
</body>
</html>
"""
# 初始化会话
@app.get("/init_session")
async def init_session(request: Request):
session_id = str(uuid4())
user_contexts[session_id] = {
"history": [],
"ip": request.client.host
}
return {"session_id": session_id}
# 流式处理用户查询的路由
@app.get("/stream")
async def stream_query(query: str, session: str = None):
async def event_generator():
nonlocal session
# 会话验证
if not session or session not in user_contexts:
yield {"event": "error", "data": json.dumps({"chunk": "无效会话", "done": True})}
return
# 获取上下文
context_obj = user_contexts[session]
context_obj["history"].append({"role": "user", "content": query})
# 保持最近5轮对话
if len(context_obj["history"]) > 10:
context_obj["history"] = context_obj["history"][-5:]
# 构建prompt时使用结构化历史
prompt = "\n".join(
[f"{msg['role']}: {msg['content']}"
for msg in context_obj["history"]]
) + "\nassistant:"
# 随机选择一个 Ollama 实例
ollama_url = choice(OLLAMA_INSTANCES)
full_response = ""
retry_count = 0
while retry_count < 3: # 增加重试机制
try:
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session_client:
payload = {
"model": "deepseek-r1:14b",
"prompt": prompt,
"stream": True,
"options": {
"temperature": 0.7,
"max_tokens": 2048,
"repeat_penalty": 1.2
}
}
async with session_client.post(ollama_url, json=payload) as response:
async for chunk in response.content.iter_any(): # 使用iter_any更稳定
if chunk:
try:
decoded_chunk = chunk.decode('utf-8').strip()
if decoded_chunk:
data = json.loads(decoded_chunk)
chunk_text = data.get("response", "")
if chunk_text:
full_response += chunk_text
yield {
"event": "message",
"data": json.dumps({
"chunk": chunk_text,
"done": False
})
}
await asyncio.sleep(0.001) # 更小的延迟
except json.JSONDecodeError:
continue
break # 成功完成则退出循环
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
retry_count += 1
if retry_count >= 3:
yield {"event": "error", "data": json.dumps({"chunk": f"连接Ollama服务失败: {str(e)}", "done": True})}
return
await asyncio.sleep(1) # 重试间隔
# 保存完整响应
context_obj["history"].append({
"role": "assistant",
"content": full_response
})
yield {"event": "message", "data": json.dumps({"chunk": "", "done": True})}
return EventSourceResponse(event_generator())
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)
上述代码运行后,实际上已经可以正常使用了,但是有个问题就是,当A和B用户同时在问问题时,模型会先回答完A的问题,B的问题才会继续输出,AI给我的解决方案就是多开几个端口,用户问问题时就进行随机选择,默认自带的端口是11434,你需要多少个并发,就开多少个端口,但是实测我这个小机器最大就支持2人同时访问,3人访问就很卡了,再有一个就卡爆了
打开终端,输入以下命令后别关闭,需要加多少个端口 你就开多少个终端
# 设置环境变量
export OLLAMA_HOST="0.0.0.0:11435"
# 启动第二个实例
ollama serve
好像其他的也没啥问题了,完全忘记记录过程 现在回想起来想做个备忘录都费劲
`ollama list` :这个命令可以帮你查看当前部署的所有模型。
`ollama rm` :这个命令就是用来删除模型的。
举个例子,如果你想删除名为`deepseek-r1`且版本为`1.5b`的模型,
你可以输入:`ollama rm deepseek-r1:1.5b`
声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。联系邮箱:[email protected]