diff --git a/main.spec b/main.spec index 6bf6bb6..1e5c364 100644 --- a/main.spec +++ b/main.spec @@ -5,7 +5,7 @@ a = Analysis( ['main.py'], pathex=[], binaries=[], - datas=[('icon.ico', '.'), ('cloudflared-windows-amd64.exe', '.')], + datas=[('icon.ico', '.'), ('data', '.'), ('cloudflared-windows-amd64.exe', '.')], hiddenimports=['tiktoken_ext.openai_public', 'tiktoken_ext', 'PySide6', 'PySide6.QtCore', 'PySide6.QtGui', 'qfluentwidgets'], hookspath=[], hooksconfig={}, diff --git a/requirements.txt b/requirements.txt index 08f6c6a..0dec58f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ requests py7zr aiohttp -pydantic # for dist packing pyinstaller diff --git a/src/native/.gitignore b/src/native/.gitignore deleted file mode 100644 index 14e7f13..0000000 --- a/src/native/.gitignore +++ /dev/null @@ -1,56 +0,0 @@ -compile_commands.json -.cache/ - -# Xmake cache -.xmake/ -build/ - -# MacOS Cache -.DS_Store - - - - -### Created by https://www.gitignore.io -### xmake ### -# Xmake cache -.xmake/ -build/ - - - -### Created by https://www.gitignore.io -### C++ ### -# Prerequisites -*.d - -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod -*.smod - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app - diff --git a/src/native/src/lib.cpp b/src/native/src/lib.cpp deleted file mode 100644 index 3f6b856..0000000 --- a/src/native/src/lib.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include -#include -#include - - -extern "C" { - enum class RetCode { - Success = 0, - WinApiInvokeFailed, - }; - - struct GpuDesc { - wchar_t name[128]; - // all in bytes - size_t dedicated_gpu_memory; - size_t dedicated_system_memory; - size_t shared_system_memory; - - int64_t current_gpu_memory_usage; // -1 means not available - }; - - __declspec(dllexport) RetCode get_all_gpus(IN GpuDesc* buf, IN size_t max_count, OUT size_t* gpu_count) { - IDXGIFactory1* pFactory; - if(auto hr = CreateDXGIFactory1(IID_PPV_ARGS(&pFactory)); hr!= S_OK) { - std::cerr << "Failed to create DXGI factory: " << std::hex << hr << std::endl; - return RetCode::WinApiInvokeFailed; - } - - for (int i = 0; i < max_count; ++i) { - IDXGIAdapter1* pAdapter; - DXGI_ADAPTER_DESC1 desc; - - if (auto hr = pFactory->EnumAdapters1(i, &pAdapter); hr!= S_OK) { - if (hr == DXGI_ERROR_NOT_FOUND) { - // Have gone through all adapters - break; - } - std::cerr << "Failed to EnumAdapters: " << hr << std::endl; - return RetCode::WinApiInvokeFailed; - }; - - if (auto hr = pAdapter->GetDesc1(&desc); hr != S_OK) { - std::cerr << "Failed to Get Desc for adapter " << i \ - << "with err code: " << std::hex << hr << std::endl; - return RetCode::WinApiInvokeFailed; - } - - if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { - // skip software render driver - break; - } - - buf[i] = GpuDesc { - // init name later. - .dedicated_gpu_memory = desc.DedicatedVideoMemory, - .dedicated_system_memory = desc.DedicatedSystemMemory, - .shared_system_memory = desc.SharedSystemMemory, - .current_gpu_memory_usage = 0, - }; - wcscpy_s(buf[i].name, 128, desc.Description); - *gpu_count = i + 1; - } - - pFactory->Release(); - return RetCode::Success; - } -} diff --git a/src/native/test/test.py b/src/native/test/test.py deleted file mode 100644 index eb6ec95..0000000 --- a/src/native/test/test.py +++ /dev/null @@ -1,38 +0,0 @@ -import ctypes as ct -from enum import IntEnum - - -class RetCode(IntEnum): - Success = 0 - WinApiInvokeFailed = 1 - -class GpuDesc(ct.Structure): - _fields_ = [ - ("name", ct.c_wchar * 128), - - ("dedicated_gpu_memory", ct.c_size_t), - ("dedicated_system_memory", ct.c_size_t), - ("shared_system_memory", ct.c_size_t), - - ("current_gpu_memory_usage", ct.c_int64), - ] - -native = ct.CDLL(r".\build\windows\x64\release\native.dll") - -get_all_gpus = native.get_all_gpus -get_all_gpus.restype = ct.c_int # enum treated as int -get_all_gpus.argtypes = ( - ct.POINTER(GpuDesc), # IN buf - ct.c_size_t, # IN max_count - ct.POINTER(ct.c_size_t), # OUT gpu_count -) - -gpu_descs = (GpuDesc * 255)() -gpu_count = ct.c_size_t() -ret = get_all_gpus(gpu_descs, 255, ct.pointer(gpu_count)) - -print("total adapters: ", gpu_count.value) -for i in range(gpu_count.value): - print("-"*80) - print("name: ", gpu_descs[i].name) - print("memory: ", gpu_descs[i].dedicated_gpu_memory) diff --git a/src/native/xmake.lua b/src/native/xmake.lua deleted file mode 100644 index 2e5be39..0000000 --- a/src/native/xmake.lua +++ /dev/null @@ -1,10 +0,0 @@ -add_rules("mode.debug", "mode.release") -add_rules("plugin.compile_commands.autoupdate") - - -target("native") - is_plat("windows") - set_kind("shared") - set_languages("c++20") - add_files("src/*.cpp") - add_links("dxgi") diff --git a/src/sakura.py b/src/sakura.py index 0428ca3..13475e1 100644 --- a/src/sakura.py +++ b/src/sakura.py @@ -1,19 +1,16 @@ +import os +import json import logging -from typing import List, Dict, Optional -from dataclasses import dataclass -from pydantic import BaseModel - +from typing import Dict, Optional from hashlib import sha256 +import requests -import asyncio - -from .utils.download import parallel_download from .utils.model_size_cauculator import ModelCalculator, ModelConfig -logger = logging.getLogger(__name__) +SAKURA_DATA_FILE = "data/model_list.json" -class Sakura(BaseModel): +class Sakura: """Sakura 模型基础信息""" repo: str @@ -27,6 +24,32 @@ class Sakura(BaseModel): bpw: float # bytes per weight config_cache: Optional[Dict] = None # 模型配置缓存 + def __init__( + self, + repo, + filename, + sha256, + size, + minimal_gpu_memory_gib, + recommended_np, + base_model_hf, + bpw, + config_cache, + ): + self.repo = repo + self.filename = filename + self.sha256 = sha256 + self.size = size + self.minimal_gpu_memory_gib = minimal_gpu_memory_gib + self.recommended_np = recommended_np + self.base_model_hf = base_model_hf + self.bpw = bpw + self.config_cache = config_cache + self.download_links = { + "HFMirror": f"https://hf-mirror.com/SakuraLLM/{repo}/resolve/main/{filename}", + "HuggingFace": f"https://huggingface.co/SakuraLLM/{repo}/resolve/main/{filename}", + } + def to_model_config(self, context: int = 8192) -> ModelConfig: """转换为 ModelCalculator 可用的配置""" return ModelConfig( @@ -53,9 +76,7 @@ class SakuraCalculator: def __init__(self, sakura: Sakura): self.sakura = sakura - def calculate_memory_requirements( - self, context_length: int - ) -> Dict[str, float]: + def calculate_memory_requirements(self, context_length: int) -> Dict[str, float]: """计算指定配置下的内存需求""" config = self.sakura.to_model_config(context_length) calculator = ModelCalculator(config) @@ -87,187 +108,56 @@ def recommend_config(self, available_memory_gib: float) -> Dict[str, int]: return best_config -def _sakura( - repo, - filename, - sha256, - size, - minimal_gpu_memory_gib, - recommended_np, - base_model_hf, - bpw, - config_cache, -): - return Sakura( - repo=repo, - filename=filename, - sha256=sha256, - size=size, - minimal_gpu_memory_gib=minimal_gpu_memory_gib, - recommended_np=recommended_np, - base_model_hf=base_model_hf, - bpw=bpw, - config_cache=config_cache, - download_links={ - "HFMirror": f"https://hf-mirror.com/SakuraLLM/{repo}/resolve/main/{filename}", - "HuggingFace": f"https://huggingface.co/SakuraLLM/{repo}/resolve/main/{filename}", - }, - ) - - SAKURA_DOWNLOAD_SRC = [ "HFMirror", "HuggingFace", ] -class ModelList(BaseModel): - created_at: int - models: List[Sakura] - - class sakura_list_init: - SAKURA_DEFAULT_LIST = [ - _sakura( - repo="GalTransl-7B-v2.6", - filename="GalTransl-7B-v2.6-IQ4_XS.gguf", - sha256="f1095c715bd37d6df1f674e86382723fe1fe45c3b4f9c80a4452bcf9128d3eca", - minimal_gpu_memory_gib=8, - size=4.29, - recommended_np={"8": 2, "10": 4, "12": 12, "16": 16, "24": 16}, - base_model_hf="Qwen/Qwen1.5-7B", - bpw=4.25, - config_cache={ - "hidden_size": 4096, - "num_attention_heads": 32, - "num_key_value_heads": 32, - "num_hidden_layers": 32, - "parameters": 7721324544.0, - }, - ), - _sakura( - repo="Sakura-14B-Qwen2.5-v1.0-GGUF", - filename="sakura-14b-qwen2.5-v1.0-iq4xs.gguf", - sha256="34af88f99c113418d0665d3ceede767c9a12040c9e7c4bb5e87cdb1b1e06e94a", - minimal_gpu_memory_gib=10, - size=8.19, - recommended_np={"10": 4, "12": 12, "16": 16, "24": 16}, - base_model_hf="Qwen/Qwen2.5-14B", - bpw=4.25, - config_cache={ - "hidden_size": 5120, - "num_attention_heads": 40, - "num_key_value_heads": 8, - "num_hidden_layers": 48, - "parameters": 14770033664.0, - }, - ), - _sakura( - repo="Sakura-14B-Qwen2.5-v1.0-GGUF", - filename="sakura-14b-qwen2.5-v1.0-q4km.gguf", - sha256="c87697cd9c7898464426cb7a1ec5e220755affaa08096766e8d20de1853c2063", - minimal_gpu_memory_gib=10, - size=8.99, - recommended_np={"10": 1, "12": 6, "16": 16, "24": 16}, - base_model_hf="Qwen/Qwen2.5-14B", - bpw=4.85, - config_cache={ - "hidden_size": 5120, - "num_attention_heads": 40, - "num_key_value_heads": 8, - "num_hidden_layers": 48, - "parameters": 14770033664.0, - }, - ), - _sakura( - repo="Sakura-14B-Qwen2beta-v0.9.2-GGUF", - filename="sakura-14b-qwen2beta-v0.9.2-iq4xs.gguf", - sha256="254a7e97e5e2a5daa371145e55bb2b0a0a789615dab2d4316189ba089a3ced67", - minimal_gpu_memory_gib=12, - size=7.91, - recommended_np={"12": 1, "16": 6, "24": 8}, - base_model_hf="Qwen/Qwen1.5-14B", - bpw=4.25, - config_cache={ - "hidden_size": 5120, - "num_attention_heads": 40, - "num_key_value_heads": 40, - "num_hidden_layers": 40, - "parameters": 14167290880.0, - }, - ), - _sakura( - repo="Sakura-14B-Qwen2beta-v0.9.2-GGUF", - filename="sakura-14b-qwen2beta-v0.9.2-q4km.gguf", - sha256="8bae1ae35b7327fa7c3a8f3ae495b81a071847d560837de2025e1554364001a5", - minimal_gpu_memory_gib=12, - size=9.19, - recommended_np={"12": 1, "16": 6, "24": 8}, - base_model_hf="Qwen/Qwen1.5-14B", - bpw=4.85, - config_cache={ - "hidden_size": 5120, - "num_attention_heads": 40, - "num_key_value_heads": 40, - "num_hidden_layers": 40, - "parameters": 14167290880.0, - }, - ), - ] - def __init__(self): - username = "PiDanShouRouZhouXD" - - self.update_file_mirror_list = [ - # Mirror - f"https://gh-proxy.com/https://raw.githubusercontent.com/{username}/Sakura_Launcher_GUI/refs/heads/main/data/model_list.json", - f"https://ghp.ci/https://raw.githubusercontent.com/{username}/Sakura_Launcher_GUI/refs/heads/main/data/model_list.json", - # JsDelivr CDN - f"https://cdn.jsdelivr.net/gh/{username}/Sakura_Launcher_GUI@main/data/model_list.json", - # rawgit CDN, but not recommended - f"https://cdn.rawgit.com/{username}/Sakura_Launcher_GUI/refs/heads/main/data/model_list.json", - # Direct access - f"https://raw.githubusercontent.com/{username}/Sakura_Launcher_GUI/refs/heads/main/data/model_list.json", - ] - - # FIXME(kuriko): This will add delay (max to 3s) in startup, - # we should split the model_list load schema in section_download.py - self.SAKURA_LIST: Dict[str, Sakura] = asyncio.run( - self.fetch_latest_model_list() - ) - - async def fetch_latest_model_list(self) -> List[Sakura]: - ret_model_list: List[Sakura] = [] + self._load_from_local() try: - model_list: ModelList = await parallel_download( - self.update_file_mirror_list, - json=True, - parser=lambda data: ModelList(**data), - timeout=3, + self._load_from_remote() + except: + pass + + def _load_from_local(self): + with open(os.path.join(SAKURA_DATA_FILE), "r", encoding="utf-8") as f: + raw_json = json.load(f) + self._update_sakura_list(raw_json) + + def _load_from_remote(self): + raw_json = requests.get( + f"https://ghp.ci/https://raw.githubusercontent.com/PiDanShouRouZhouXD/Sakura_Launcher_GUI/refs/heads/main/data/model_list.json" + ).json() + self._update_sakura_list(raw_json) + + def _update_sakura_list(self, raw_json): + sakura_list = [] + for obj in raw_json["models"]: + sakura = Sakura( + repo=obj["repo"], + filename=obj["filename"], + sha256=obj["sha256"], + minimal_gpu_memory_gib=obj["minimal_gpu_memory_gib"], + size=obj["size"], + recommended_np=obj["recommended_np"], + base_model_hf=obj["base_model_hf"], + bpw=obj["bpw"], + config_cache=obj["config_cache"], ) - - print(f"当前模型列表:{model_list}") - - for model in model_list.models: - # FIXME(kuriko): move download links to model_list.json rather than hard coded. - ret_model_list.append( - _sakura(**model.model_dump(exclude={"download_links"})) - ) - - except Exception as e: - logger.error("无法获取模型列表, 回退到内置默认模型列表") - ret_model_list = self.SAKURA_DEFAULT_LIST - - return ret_model_list + sakura_list.append(sakura) + self.sakura_list = sakura_list def __getitem__(self, name) -> Sakura: - for model in self.SAKURA_LIST: + for model in self.sakura_list: if model.filename == name: return model return None def __iter__(self): - for item in self.SAKURA_LIST: + for item in self.sakura_list: yield item diff --git a/src/utils/download.py b/src/utils/download.py deleted file mode 100644 index 1dc83c0..0000000 --- a/src/utils/download.py +++ /dev/null @@ -1,53 +0,0 @@ -import aiohttp -from typing import List, Any, Callable -import logging -import asyncio - - -logger = logging.getLogger(__name__) - - -async def get(url: str, session: aiohttp.ClientSession, json: bool, timeout=0, parser: None|Callable=None) -> Any: - try: - async with session.get(url=url, timeout=timeout) as response: - if json: - resp = await response.json(content_type=None) - else: - resp = await response.text() - - if not parser: return resp - - if (ret := parser(resp)) and ret: - return ret - else: - raise ValueError("return data parsed failed") - except ValueError as e: - logger.error(f"fetch {url} with error: mismatch config schema") - except Exception as e: - logger.error(f"fetch {url} with error: network issue") - raise e - -async def parallel_download(urls: List[str], json=False, timeout=0, parser: None|Callable=None) -> Any: - ''' Get first result from multiple URLs concurrently. ''' - try: - ret = None - async with aiohttp.ClientSession() as session: - tasks = [asyncio.create_task(get(url, session, json, timeout, parser)) for url in urls] - - done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) - while len(pending) > 0: - done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED) - for task in done: - try: - ret = task.result() - except Exception as e: - continue - - if ret: - for task in pending: - task.cancel() - break - - return ret - except Exception as e: - raise e diff --git a/src/utils/generate_default_list.py b/src/utils/generate_default_list.py deleted file mode 100644 index 55cab50..0000000 --- a/src/utils/generate_default_list.py +++ /dev/null @@ -1,73 +0,0 @@ -import json -from pathlib import Path -import black -from typing import List, Dict, Any - -def load_model_list() -> Dict[str, Any]: - """加载model_list.json文件""" - json_path = Path(__file__).parent.parent.parent / "data" / "model_list.json" - with open(json_path, "r", encoding="utf-8") as f: - return json.load(f) - -def generate_sakura_list_code(models: List[Dict[str, Any]]) -> str: - """生成SAKURA_DEFAULT_LIST的代码""" - items = [] - for model in models: - # 构建_sakura函数的参数 - params = [ - f'repo="{model["repo"]}"', - f'filename="{model["filename"]}"', - f'sha256="{model["sha256"]}"', - f'minimal_gpu_memory_gib={model["minimal_gpu_memory_gib"]}', - f'size={model["size"]}', - f'recommended_np={model["recommended_np"]}', - f'base_model_hf="{model["base_model_hf"]}"', - f'bpw={model["bpw"]}', - f'config_cache={model["config_cache"]}' - ] - - # 使用join而不是f-string来处理多行字符串 - param_str = ',\n '.join(params) - item = ' _sakura(\n ' + param_str + '\n )' - items.append(item) - - # 同样使用join来处理多行字符串 - items_str = ',\n'.join(items) - code = ' SAKURA_DEFAULT_LIST = [\n' + items_str + '\n ]' - - return code - -def update_sakura_file(): - """更新src/sakura.py文件中的SAKURA_DEFAULT_LIST""" - # 加载model_list.json - model_list = load_model_list() - - # 生成新的代码 - new_code = generate_sakura_list_code(model_list["models"]) - - # 读取现有的sakura.py文件 - sakura_path = Path(__file__).parent.parent / "sakura.py" - with open(sakura_path, "r", encoding="utf-8") as f: - content = f.read() - - # 查找并替换SAKURA_DEFAULT_LIST部分 - start_marker = "class sakura_list_init:" - list_start = content.find(" SAKURA_DEFAULT_LIST = [", content.find(start_marker)) - list_end = content.find("\n\n", list_start) - - # 替换内容 - new_content = content[:list_start] + new_code + content[list_end:] - - # 使用black格式化代码 - new_content = black.format_file_contents( - new_content, - fast=False, - mode=black.FileMode() - ) - - # 写回文件 - with open(sakura_path, "w", encoding="utf-8") as f: - f.write(new_content) - -if __name__ == "__main__": - update_sakura_file() diff --git a/src/utils/linux.py b/src/utils/linux.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/utils/native.py b/src/utils/native.py deleted file mode 100644 index aa3c5b4..0000000 --- a/src/utils/native.py +++ /dev/null @@ -1,55 +0,0 @@ -import logging -from typing import List, TYPE_CHECKING -import ctypes as ct - -if TYPE_CHECKING: - from ..gpu import GPUInfo - -logger = logging.getLogger(__name__) - -class GPUDescFFI(ct.Structure): - _fields_ = [ - ("name", ct.c_wchar * 128), - ("dedicated_gpu_memory", ct.c_size_t), - ("dedicated_system_memory", ct.c_size_t), - ("shared_system_memory", ct.c_size_t), - ("current_gpu_memory_usage", ct.c_int64), - ] - -class NativeDll: - def __init__(self): - self.native = native = ct.CDLL(r"./native.dll") - get_all_gpus = native.get_all_gpus - get_all_gpus.restype = ct.c_uint # enum treated as int - get_all_gpus.argtypes = ( - ct.POINTER(GPUDescFFI), # IN buf - ct.c_size_t, # IN max_count - ct.POINTER(ct.c_size_t), # OUT gpu_count - ) - - def get_gpus(self) -> List[GPUInfo]: - get_all_gpus = self.native.get_all_gpus - gpu_descs = (GPUDescFFI * 255)() - gpu_count = ct.c_size_t() - retcode = get_all_gpus(gpu_descs, 255, ct.pointer(gpu_count)) - if retcode != 0: - raise RuntimeError(f"Failed to get all gpus with error code: {retcode}") - - ret = [] - for i in range(int(gpu_count.value)): - gpu_info = GPUInfo( - name=gpu_descs[i].name, - gpu_type=self.get_gpu_type(gpu_descs[i].name), - dedicated_gpu_memory=gpu_descs[i].dedicated_gpu_memory, - dedicated_system_memory=gpu_descs[i].dedicated_system_memory, - shared_system_memory=gpu_descs[i].shared_system_memory, - current_gpu_memory_usage=gpu_descs[i].current_gpu_memory_usage, - index=i, - ability=None - ) - if gpu_info.name not in self.gpu_info_map: # 使用正确的属性名 - self.gpu_info_map[gpu_info.name] = gpu_info - logging.info(f"检测到 GPU: {gpu_info}") - ret.append(gpu_info) - - return ret