From e5e18f9966b8002ab3b6b2e442bb7aaeb992c97f Mon Sep 17 00:00:00 2001 From: BuildTools Date: Mon, 9 Sep 2024 20:46:53 -0700 Subject: [PATCH] feat(core): add GGUF splitting feature - add ability to split GGUFs --- src/AutoGGUF.py | 117 +++++++++++++++++++++++++++++++++++++++++++ src/Localizations.py | 9 ++++ 2 files changed, 126 insertions(+) diff --git a/src/AutoGGUF.py b/src/AutoGGUF.py index e2ad663..b934d6c 100644 --- a/src/AutoGGUF.py +++ b/src/AutoGGUF.py @@ -211,12 +211,75 @@ def __init__(self, args: List[str]) -> None: self.fp8_layout.addWidget(quantize_button) self.fp8_dialog.setLayout(self.fp8_layout) + # Split GGUF Window + self.split_gguf_dialog = QDialog(self) + self.split_gguf_dialog.setWindowTitle(SPLIT_GGUF) + self.split_gguf_dialog.setFixedWidth(500) + self.split_gguf_layout = QVBoxLayout() + + # Input path + input_layout = QHBoxLayout() + self.split_gguf_input = QLineEdit() + input_button = QPushButton(BROWSE) + input_button.clicked.connect( + lambda: self.split_gguf_input.setText( + QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER) + ) + ) + input_layout.addWidget(QLabel(INPUT_MODEL)) + input_layout.addWidget(self.split_gguf_input) + input_layout.addWidget(input_button) + self.split_gguf_layout.addLayout(input_layout) + + # Output path + output_layout = QHBoxLayout() + self.split_gguf_output = QLineEdit() + output_button = QPushButton(BROWSE) + output_button.clicked.connect( + lambda: self.split_gguf_output.setText( + QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER) + ) + ) + output_layout.addWidget(QLabel(OUTPUT)) + output_layout.addWidget(self.split_gguf_output) + output_layout.addWidget(output_button) + self.split_gguf_layout.addLayout(output_layout) + + # Split options + split_options_layout = QHBoxLayout() + self.split_max_size = QLineEdit() + self.split_max_size.setPlaceholderText("Size in G/M") + self.split_max_tensors = QLineEdit() + self.split_max_tensors.setPlaceholderText("Number of tensors") + split_options_layout.addWidget(QLabel(SPLIT_MAX_SIZE)) + split_options_layout.addWidget(self.split_max_size) + split_options_layout.addWidget(QLabel(SPLIT_MAX_TENSORS)) + split_options_layout.addWidget(self.split_max_tensors) + self.split_gguf_layout.addLayout(split_options_layout) + + # Split button + split_button = QPushButton(SPLIT_GGUF) + split_button.clicked.connect( + lambda: self.split_gguf( + self.split_gguf_input.text(), + self.split_gguf_output.text(), + self.split_max_size.text(), + self.split_max_tensors.text(), + ) + ) + self.split_gguf_layout.addWidget(split_button) + self.split_gguf_dialog.setLayout(self.split_gguf_layout) + # Tools menu tools_menu = self.menubar.addMenu("&Tools") autofp8_action = QAction("&AutoFP8", self) autofp8_action.setShortcut(QKeySequence("Shift+Q")) autofp8_action.triggered.connect(self.fp8_dialog.exec) + split_gguf_action = QAction("&Split GGUF", self) + split_gguf_action.setShortcut(QKeySequence("Shift+G")) + split_gguf_action.triggered.connect(self.split_gguf_dialog.exec) tools_menu.addAction(autofp8_action) + tools_menu.addAction(split_gguf_action) # Content widget content_widget = QWidget() @@ -1246,6 +1309,60 @@ def download_error(self, error_message) -> None: if os.path.exists(partial_file): os.remove(partial_file) + def split_gguf( + self, model_dir: str, output_dir: str, max_size: str, max_tensors: str + ) -> None: + if not model_dir or not output_dir: + show_error(self.logger, f"{SPLIT_GGUF_ERROR}: {NO_MODEL_SELECTED}") + return + self.logger.info(SPLIT_GGUF_TASK_STARTED) + try: + command = [ + "llama-gguf-split", + ] + + if max_size: + command.extend(["--split-max-size", max_size]) + if max_tensors: + command.extend(["--split-max-tensors", max_tensors]) + + command.extend([model_dir, output_dir]) + + logs_path = self.logs_input.text() + ensure_directory(logs_path) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(logs_path, f"gguf_split_{timestamp}.log") + + thread = QuantizationThread(command, os.getcwd(), log_file) + self.quant_threads.append(thread) + + task_name = SPLIT_GGUF_DYNAMIC.format(os.path.basename(model_dir)) + task_item = TaskListItem( + task_name, + log_file, + show_progress_bar=False, + logger=self.logger, + quant_threads=self.quant_threads, + ) + list_item = QListWidgetItem(self.task_list) + list_item.setSizeHint(task_item.sizeHint()) + self.task_list.addItem(list_item) + self.task_list.setItemWidget(list_item, task_item) + + thread.status_signal.connect(task_item.update_status) + thread.finished_signal.connect( + lambda: self.task_finished(thread, task_item) + ) + thread.error_signal.connect( + lambda err: handle_error(self.logger, err, task_item) + ) + thread.start() + + except Exception as e: + show_error(self.logger, SPLIT_GGUF_ERROR.format(e)) + self.logger.info(SPLIT_GGUF_TASK_FINISHED) + def verify_gguf(self, file_path) -> bool: try: with open(file_path, "rb") as f: diff --git a/src/Localizations.py b/src/Localizations.py index 8b1d6f5..576b75a 100644 --- a/src/Localizations.py +++ b/src/Localizations.py @@ -396,6 +396,15 @@ def __init__(self): "HuggingFace to GGUF conversion task started" ) + # Split GGUF + self.SPLIT_GGUF = "Split GGUF" + self.SPLIT_MAX_SIZE = "Split Max Size" + self.SPLIT_MAX_TENSORS = "Split Max Tensors" + self.SPLIT_GGUF_TASK_STARTED = "GGUF Split task started" + self.SPLIT_GGUF_TASK_FINISHED = "GGUF Split task finished" + self.SPLIT_GGUF_COMMAND = "GGUF Split Command" + self.SPLIT_GGUF_ERROR = "Error starting GGUF split" + class _French(_Localization): def __init__(self):