Wiki source code of ИИ: Установка Qwen3.5-35B-A3B
Last modified by Алексей Александрович Иванов on 2026/03/06 08:07
Show last authors
| author | version | line-number | content |
|---|---|---|---|
| 1 | === 1️⃣ Установка llama.cpp === | ||
| 2 | |||
| 3 | bash | ||
| 4 | |||
| 5 | {{{apt-get update | ||
| 6 | apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev git -y | ||
| 7 | git clone https://github.com/ggml-org/llama.cpp | ||
| 8 | cmake llama.cpp -B llama.cpp/build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=OFF | ||
| 9 | cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-server | ||
| 10 | cp llama.cpp/build/bin/llama-* llama.cpp/}}} | ||
| 11 | |||
| 12 | === 2️⃣ Скачивание модели === | ||
| 13 | |||
| 14 | bash | ||
| 15 | |||
| 16 | {{{pip install huggingface_hub hf_transfer | ||
| 17 | export HF_HUB_ENABLE_HF_TRANSFER=1 | ||
| 18 | |||
| 19 | # Для 4-bit (рекомендуется) | ||
| 20 | huggingface-cli download unsloth/Qwen3.5-35B-A3B-GGUF \ | ||
| 21 | --local-dir ./qwen3.5-35b \ | ||
| 22 | --include "*UD-Q4_K_XL*"}}} | ||
| 23 | |||
| 24 | === 3️⃣ Запуск (выберите режим) === | ||
| 25 | |||
| 26 | **Режим мышления (thinking) - кодинг:** | ||
| 27 | |||
| 28 | bash | ||
| 29 | |||
| 30 | {{{cd llama.cpp | ||
| 31 | ./llama-cli \ | ||
| 32 | --model ../qwen3.5-35b/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf \ | ||
| 33 | --ctx-size 16384 \ | ||
| 34 | --temp 0.6 \ | ||
| 35 | --top-p 0.95 \ | ||
| 36 | --top-k 20 \ | ||
| 37 | --min-p 0.00 \ | ||
| 38 | --interactive}}} | ||
| 39 | |||
| 40 | **Режим без мышления (non-thinking) - общие задачи:** | ||
| 41 | |||
| 42 | bash | ||
| 43 | |||
| 44 | {{{cd llama.cpp | ||
| 45 | ./llama-cli \ | ||
| 46 | --model ../qwen3.5-35b/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf \ | ||
| 47 | --ctx-size 16384 \ | ||
| 48 | --temp 0.7 \ | ||
| 49 | --top-p 0.8 \ | ||
| 50 | --top-k 20 \ | ||
| 51 | --min-p 0.00 \ | ||
| 52 | --chat-template-kwargs '{"enable_thinking":false}' \ | ||
| 53 | --interactive}}} | ||
| 54 | |||
| 55 | === 4️⃣ Запуск веб-сервера === | ||
| 56 | |||
| 57 | bash | ||
| 58 | |||
| 59 | {{{cd llama.cpp | ||
| 60 | ./llama-server \ | ||
| 61 | --model ../qwen3.5-35b/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf \ | ||
| 62 | --ctx-size 16384 \ | ||
| 63 | --temp 0.7 \ | ||
| 64 | --top-p 0.8 \ | ||
| 65 | --host 0.0.0.0 \ | ||
| 66 | --port 8080}}} | ||
| 67 | |||
| 68 | Веб-интерфейс будет доступен по адресу: http:~/~/localhost:8080 | ||
| 69 | |||
| 70 | === ⚡ Быстрый тест === | ||
| 71 | |||
| 72 | bash | ||
| 73 | |||
| 74 | {{{echo 'Привет!' | ./llama-cli --model ../qwen3.5-35b/*.gguf --temp 0.7 --n-predict 100}}} | ||
| 75 | |||
| 76 | **Требования:** ~~22GB RAM для 4-bit версии. |