Skip to content

Commit

Permalink
feat(core): add GGUF splitting feature
Browse files Browse the repository at this point in the history
- add ability to split GGUFs
  • Loading branch information
leafspark committed Sep 10, 2024
1 parent cee4294 commit e5e18f9
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 0 deletions.
117 changes: 117 additions & 0 deletions src/AutoGGUF.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,75 @@ def __init__(self, args: List[str]) -> None:
self.fp8_layout.addWidget(quantize_button)
self.fp8_dialog.setLayout(self.fp8_layout)

# Split GGUF Window
self.split_gguf_dialog = QDialog(self)
self.split_gguf_dialog.setWindowTitle(SPLIT_GGUF)
self.split_gguf_dialog.setFixedWidth(500)
self.split_gguf_layout = QVBoxLayout()

# Input path
input_layout = QHBoxLayout()
self.split_gguf_input = QLineEdit()
input_button = QPushButton(BROWSE)
input_button.clicked.connect(
lambda: self.split_gguf_input.setText(
QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER)
)
)
input_layout.addWidget(QLabel(INPUT_MODEL))
input_layout.addWidget(self.split_gguf_input)
input_layout.addWidget(input_button)
self.split_gguf_layout.addLayout(input_layout)

# Output path
output_layout = QHBoxLayout()
self.split_gguf_output = QLineEdit()
output_button = QPushButton(BROWSE)
output_button.clicked.connect(
lambda: self.split_gguf_output.setText(
QFileDialog.getExistingDirectory(self, OPEN_MODEL_FOLDER)
)
)
output_layout.addWidget(QLabel(OUTPUT))
output_layout.addWidget(self.split_gguf_output)
output_layout.addWidget(output_button)
self.split_gguf_layout.addLayout(output_layout)

# Split options
split_options_layout = QHBoxLayout()
self.split_max_size = QLineEdit()
self.split_max_size.setPlaceholderText("Size in G/M")
self.split_max_tensors = QLineEdit()
self.split_max_tensors.setPlaceholderText("Number of tensors")
split_options_layout.addWidget(QLabel(SPLIT_MAX_SIZE))
split_options_layout.addWidget(self.split_max_size)
split_options_layout.addWidget(QLabel(SPLIT_MAX_TENSORS))
split_options_layout.addWidget(self.split_max_tensors)
self.split_gguf_layout.addLayout(split_options_layout)

# Split button
split_button = QPushButton(SPLIT_GGUF)
split_button.clicked.connect(
lambda: self.split_gguf(
self.split_gguf_input.text(),
self.split_gguf_output.text(),
self.split_max_size.text(),
self.split_max_tensors.text(),
)
)
self.split_gguf_layout.addWidget(split_button)
self.split_gguf_dialog.setLayout(self.split_gguf_layout)

# Tools menu
tools_menu = self.menubar.addMenu("&Tools")
autofp8_action = QAction("&AutoFP8", self)
autofp8_action.setShortcut(QKeySequence("Shift+Q"))
autofp8_action.triggered.connect(self.fp8_dialog.exec)
split_gguf_action = QAction("&Split GGUF", self)
split_gguf_action.setShortcut(QKeySequence("Shift+G"))
split_gguf_action.triggered.connect(self.split_gguf_dialog.exec)
tools_menu.addAction(autofp8_action)
tools_menu.addAction(split_gguf_action)

# Content widget
content_widget = QWidget()
Expand Down Expand Up @@ -1246,6 +1309,60 @@ def download_error(self, error_message) -> None:
if os.path.exists(partial_file):
os.remove(partial_file)

def split_gguf(
self, model_dir: str, output_dir: str, max_size: str, max_tensors: str
) -> None:
if not model_dir or not output_dir:
show_error(self.logger, f"{SPLIT_GGUF_ERROR}: {NO_MODEL_SELECTED}")
return
self.logger.info(SPLIT_GGUF_TASK_STARTED)
try:
command = [
"llama-gguf-split",
]

if max_size:
command.extend(["--split-max-size", max_size])
if max_tensors:
command.extend(["--split-max-tensors", max_tensors])

command.extend([model_dir, output_dir])

logs_path = self.logs_input.text()
ensure_directory(logs_path)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = os.path.join(logs_path, f"gguf_split_{timestamp}.log")

thread = QuantizationThread(command, os.getcwd(), log_file)
self.quant_threads.append(thread)

task_name = SPLIT_GGUF_DYNAMIC.format(os.path.basename(model_dir))
task_item = TaskListItem(
task_name,
log_file,
show_progress_bar=False,
logger=self.logger,
quant_threads=self.quant_threads,
)
list_item = QListWidgetItem(self.task_list)
list_item.setSizeHint(task_item.sizeHint())
self.task_list.addItem(list_item)
self.task_list.setItemWidget(list_item, task_item)

thread.status_signal.connect(task_item.update_status)
thread.finished_signal.connect(
lambda: self.task_finished(thread, task_item)
)
thread.error_signal.connect(
lambda err: handle_error(self.logger, err, task_item)
)
thread.start()

except Exception as e:
show_error(self.logger, SPLIT_GGUF_ERROR.format(e))
self.logger.info(SPLIT_GGUF_TASK_FINISHED)

def verify_gguf(self, file_path) -> bool:
try:
with open(file_path, "rb") as f:
Expand Down
9 changes: 9 additions & 0 deletions src/Localizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,15 @@ def __init__(self):
"HuggingFace to GGUF conversion task started"
)

# Split GGUF
self.SPLIT_GGUF = "Split GGUF"
self.SPLIT_MAX_SIZE = "Split Max Size"
self.SPLIT_MAX_TENSORS = "Split Max Tensors"
self.SPLIT_GGUF_TASK_STARTED = "GGUF Split task started"
self.SPLIT_GGUF_TASK_FINISHED = "GGUF Split task finished"
self.SPLIT_GGUF_COMMAND = "GGUF Split Command"
self.SPLIT_GGUF_ERROR = "Error starting GGUF split"


class _French(_Localization):
def __init__(self):
Expand Down

0 comments on commit e5e18f9

Please sign in to comment.