Merge pull request #6107 from duanjunwen/dev/zero_bubble

[Zerobubble] Merge Main.
hpcaitech · Nov 5, 2024 · 37b23e3 · 37b23e3
2 parents aed20fb + 8e40087
commit 37b23e3
Show file tree

Hide file tree

Showing 60 changed files with 1,685 additions and 834 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,21 +15,21 @@ repos:
         args: ["--profile", "black"] # avoid conflict with black
 
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 24.8.0
+    rev: 24.10.0
     hooks:
     - id: black
       name: black formatter
       args: ['--line-length=120', '--target-version=py37', '--target-version=py38', '--target-version=py39','--target-version=py310']
 
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v18.1.8
+    rev: v19.1.2
     hooks:
     - id: clang-format
       name: clang formatter
       types_or: [c++, c]
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: check-yaml
       - id: check-merge-conflict

diff --git a/README.md b/README.md
@@ -25,16 +25,36 @@
 
 </div>
 
+## GPU Cloud HPC-AI.COM Coming！！
+
+For a limited time, you can access an H100 Server for just $1! This is your chance to leverage premium GPU power at an unbeatable price.
+Plus, when you refer a friend, you’ll receive 20% cashback or compute credits equal to 100% of their top-up!
+
+Our platform offers on-demand premium compute, ensuring safe, permanent data storage even after stopping your instance.
+Don’t miss this incredible opportunity to accelerate your AI projects!
+
+Unlock premium GPUs and register now at [HPC-AI.COM](https://hpc-ai.com) to receive $10!
+
+Special Bonuses:
+
+* Top up $1,000 and receive 300 credits
+* Top up $500 and receive 100 credits
+
+<div align="center">
+   <a href="https://youtu.be/ilMQpU71ddI?si=J4JSPzZ03ycYmlki">
+   <img src="https://github.com/hpcaitech/public_assets/blob/main/colossalai/img/HPCAICOM241010.jpg" width="700" />
+   </a>
+</div>
+
+
 ## Latest News
+* [2024/10] [How to build a low-cost Sora-like app? Solutions for you](https://company.hpc-ai.com/blog/how-to-build-a-low-cost-sora-like-app-solutions-for-you)
+* [2024/09] [Singapore Startup HPC-AI Tech Secures 50 Million USD in Series A Funding to Build the Video Generation AI Model and GPU Platform](https://company.hpc-ai.com/blog/singapore-startup-hpc-ai-tech-secures-50-million-usd-in-series-a-funding-to-build-the-video-generation-ai-model-and-gpu-platform)
+* [2024/09] [Reducing AI Large Model Training Costs by 30% Requires Just a Single Line of Code From FP8 Mixed Precision Training Upgrades](https://company.hpc-ai.com/blog/reducing-ai-large-model-training-costs-by-30-requires-just-a-single-line-of-code-from-fp8-mixed-precision-training-upgrades)
 * [2024/06] [Open-Sora Continues Open Source: Generate Any 16-Second 720p HD Video with One Click, Model Weights Ready to Use](https://hpc-ai.com/blog/open-sora-from-hpc-ai-tech-team-continues-open-source-generate-any-16-second-720p-hd-video-with-one-click-model-weights-ready-to-use)
 * [2024/05] [Large AI Models Inference Speed Doubled, Colossal-Inference Open Source Release](https://hpc-ai.com/blog/colossal-inference)
 * [2024/04] [Open-Sora Unveils Major Upgrade: Embracing Open Source with Single-Shot 16-Second Video Generation and 720p Resolution](https://hpc-ai.com/blog/open-soras-comprehensive-upgrade-unveiled-embracing-16-second-video-generation-and-720p-resolution-in-open-source)
 * [2024/04] [Most cost-effective solutions for inference, fine-tuning and pretraining, tailored to LLaMA3 series](https://hpc-ai.com/blog/most-cost-effective-solutions-for-inference-fine-tuning-and-pretraining-tailored-to-llama3-series)
-* [2024/03] [314 Billion Parameter Grok-1 Inference Accelerated by 3.8x, Efficient and Easy-to-Use PyTorch+HuggingFace version is Here](https://hpc-ai.com/blog/314-billion-parameter-grok-1-inference-accelerated-by-3.8x-efficient-and-easy-to-use-pytorchhuggingface-version-is-here)
-* [2024/03] [Open-Sora: Revealing Complete Model Parameters, Training Details, and Everything for Sora-like Video Generation Models](https://hpc-ai.com/blog/open-sora-v1.0)
-* [2024/03] [Open-Sora：Sora Replication Solution with 46% Cost Reduction, Sequence Expansion to Nearly a Million](https://hpc-ai.com/blog/open-sora)
-* [2024/01] [Inference Performance Improved by 46%, Open Source Solution Breaks the Length Limit of LLM for Multi-Round Conversations](https://hpc-ai.com/blog/Colossal-AI-SwiftInfer)
-* [2023/07] [HPC-AI Tech Raises 22 Million USD in Series A Funding](https://www.hpc-ai.tech/blog/hpc-ai-tech-raises-22-million-usd-in-series-a-funding-to-fuel-team-expansion-and-business-growth)
 
 ## Table of Contents
 <ul>

diff --git a/applications/ColossalChat/README.md b/applications/ColossalChat/README.md
@@ -27,11 +27,11 @@
 - [Alternative Option For RLHF: SimPO](#alternative-option-for-rlhf-simple-preference-optimization-simpo)
 - [Alternative Option For RLHF: ORPO](#alternative-option-for-rlhf-odds-ratio-preference-optimization-orpo)
 - [Alternative Option For RLHF: KTO](#alternative-option-for-rlhf-kahneman-tversky-optimization-kto)
+- [O1 Journey](#o1-journey)
+  - [Inference with Self-refined MCTS](#inference-with-self-refined-mcts)
 - [FAQ](#faq)
   - [How to save/load checkpoint](#faq)
   - [How to train with limited resources](#faq)
-- [The Plan](#the-plan)
-  - [Real-time progress](#real-time-progress)
 - [Invitation to open-source contribution](#invitation-to-open-source-contribution)
 - [Quick Preview](#quick-preview)
 - [Authors](#authors)
@@ -272,7 +272,7 @@ Odds Ratio Preference Optimization (ORPO) from this [paper](https://arxiv.org/pd
 ## Alternative Option For RLHF: Kahneman-Tversky Optimization (KTO)
 We support the method introduced in the paper [KTO:Model Alignment as Prospect Theoretic Optimization](https://arxiv.org/pdf/2402.01306) (KTO). Which is a aligment method that directly maximize "human utility" of generation results. Read this [README](./examples/README.md) for more information.
 
-### Inference Quantization and Serving - After Training
+## Inference Quantization and Serving - After Training
 
 We provide an online inference server and a benchmark. We aim to run inference on single GPU, so quantization is essential when using large models.
 
@@ -281,6 +281,21 @@ We support 8-bit quantization (RTN), 4-bit quantization (GPTQ), and FP16 inferen
 Online inference server scripts can help you deploy your own services.
 For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat/inference).
 
+## O1 Journey
+### Inference with Self-refined MCTS
+We provide the implementation of MCT Self-Refine (MCTSr) algorithm, an innovative integration of Large Language Models with Monte Carlo Tree Search.
+To run inference with MCTS, simply use the following script.
+```python
+from coati.reasoner.guided_search.mcts import MCTS
+from coati.reasoner.guided_search.prompt_store.qwen import Qwen32B_prompt_CFG
+
+problem = "How Many R in 'Strawberry'"
+
+search_tree = MCTS(problem=problem, max_simulations=8, cfg=Qwen32B_prompt_CFG)
+answer = search_tree.simulate()
+print(answer)
+```
+
 ## Coati7B examples
 
 ### Generation

diff --git a/applications/ColossalChat/coati/models/loss.py b/applications/ColossalChat/coati/models/loss.py
@@ -153,10 +153,11 @@ def forward(
         else:
             # If no reference model is provided
             ref_logratios = 0.0
+
         pi_logratios = logprob_actor_chosen.sum(-1) - logprob_actor_reject.sum(-1)
         logits = pi_logratios - ref_logratios - self.gamma / self.beta
         losses = -torch.nn.functional.logsigmoid(self.beta * logits)
-
+        loss = losses.mean()
         # Calculate rewards for logging
         if logprob_ref_chosen is not None:
             chosen_rewards = self.beta * (logprob_actor_chosen.sum(-1) - logprob_ref_chosen.sum(-1)).detach()
@@ -167,7 +168,7 @@ def forward(
         else:
             rejected_rewards = self.beta * logprob_actor_reject.sum(-1).detach()
 
-        return losses, chosen_rewards, rejected_rewards
+        return loss, chosen_rewards, rejected_rewards
 
 
 class LogSigLoss(nn.Module):

diff --git a/applications/ColossalChat/coati/models/utils.py b/applications/ColossalChat/coati/models/utils.py
@@ -50,8 +50,8 @@ def _log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) -> torch.
         torch.Tensor: The log probabilities corresponding to the labels.
     """
     log_probs = F.log_softmax(logits, dim=-1)
-    log_probs_labels = log_probs.gather(dim=-1, index=labels.unsqueeze(-1))
-    return log_probs_labels.squeeze(-1)
+    per_label_logps = log_probs.gather(dim=-1, index=labels.unsqueeze(-1))
+    return per_label_logps.squeeze(-1)
 
 
 def calc_action_log_probs(logits: torch.Tensor, sequences: torch.LongTensor, num_actions: int) -> torch.Tensor:

diff --git a/applications/ColossalChat/coati/reasoner/guided_search/llm.py b/applications/ColossalChat/coati/reasoner/guided_search/llm.py
@@ -0,0 +1,26 @@
+import openai
+from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
+
+API_KEY = "Dummy API Key"
+
+
+def get_client(base_url: str | None = None) -> openai.Client:
+    return openai.Client(api_key=API_KEY, base_url=base_url)
+
+
+def chat_completion(
+    messages: list[ChatCompletionMessageParam],
+    model: str,
+    base_url: str | None = None,
+    temperature: float = 0.8,
+    **kwargs,
+) -> ChatCompletion:
+    client = get_client(base_url)
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        temperature=temperature,
+        **kwargs,
+    )
+    return response