-
Notifications
You must be signed in to change notification settings - Fork 1
/
training_logData.diff
41 lines (40 loc) · 1.68 KB
/
training_logData.diff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
--- original.py 2023-04-30 06:43:11.885991000 -0500
+++ modified.py 2023-04-30 06:49:19.268971428 -0500
@@ -411,8 +411,38 @@
if WANT_INTERRUPT:
yield "Interrupted before start."
return
+
+ def log_train_dataset(trainer):
+ from datetime import datetime
+ import os
+
+ # Get current date and time
+ now = datetime.now()
+ date_time = now.strftime("%Y%m%d_%H%M%S")
+
+ # Create a log filename with the current date and time
+ log_filename = f"train_dataset_log_{date_time}.txt"
+
+ # Ensure the logs directory exists
+ if not os.path.exists('logs'):
+ os.makedirs('logs')
+
+ # Create or open a log file in the logs directory
+ with open(os.path.join('logs', log_filename), 'a') as log_file:
+ # Iterate over the first 10 elements in the dataset
+ for i in range(min(10, len(trainer.train_dataset))):
+ # Decode the 'input_ids' from each element in the dataset
+ decoded_text = shared.tokenizer.decode(trainer.train_dataset[i]['input_ids'])
+ # Append the decoded text to the log file
+ log_file.write(decoded_text)
+ # Add separator line
+ log_file.write("\n---------------🦙TRAINER LOG DIVIDER😉---------------\n")
+
+ # Print log file creation confirmation
+ print(f"Log file '{log_filename}' created in the 'logs' directory. 🦙😉")
def threaded_run():
+ log_train_dataset(trainer)
trainer.train()
# Note: save in the thread in case the gradio thread breaks (eg browser closed)
lora_model.save_pretrained(lora_file_path)