PierrunoYT commited on
Commit
e5fe308
·
verified ·
1 Parent(s): 3727c37

Upload folder using huggingface_hub

Browse files
Files changed (49) hide show
  1. README.md +99 -0
  2. checkpoints/checkpoint-4000/config.json +24 -0
  3. checkpoints/checkpoint-4000/model.safetensors +3 -0
  4. checkpoints/checkpoint-4000/optimizer.pt +3 -0
  5. checkpoints/checkpoint-4000/rng_state.pth +3 -0
  6. checkpoints/checkpoint-4000/scaler.pt +3 -0
  7. checkpoints/checkpoint-4000/scheduler.pt +3 -0
  8. checkpoints/checkpoint-4000/special_tokens_map.json +7 -0
  9. checkpoints/checkpoint-4000/tokenizer.json +0 -0
  10. checkpoints/checkpoint-4000/tokenizer_config.json +56 -0
  11. checkpoints/checkpoint-4000/trainer_state.json +410 -0
  12. checkpoints/checkpoint-4000/training_args.bin +3 -0
  13. checkpoints/checkpoint-4000/vocab.txt +0 -0
  14. checkpoints/checkpoint-4500/config.json +24 -0
  15. checkpoints/checkpoint-4500/model.safetensors +3 -0
  16. checkpoints/checkpoint-4500/optimizer.pt +3 -0
  17. checkpoints/checkpoint-4500/rng_state.pth +3 -0
  18. checkpoints/checkpoint-4500/scaler.pt +3 -0
  19. checkpoints/checkpoint-4500/scheduler.pt +3 -0
  20. checkpoints/checkpoint-4500/special_tokens_map.json +7 -0
  21. checkpoints/checkpoint-4500/tokenizer.json +0 -0
  22. checkpoints/checkpoint-4500/tokenizer_config.json +56 -0
  23. checkpoints/checkpoint-4500/trainer_state.json +457 -0
  24. checkpoints/checkpoint-4500/training_args.bin +3 -0
  25. checkpoints/checkpoint-4500/vocab.txt +0 -0
  26. checkpoints/checkpoint-4689/config.json +24 -0
  27. checkpoints/checkpoint-4689/model.safetensors +3 -0
  28. checkpoints/checkpoint-4689/optimizer.pt +3 -0
  29. checkpoints/checkpoint-4689/rng_state.pth +3 -0
  30. checkpoints/checkpoint-4689/scaler.pt +3 -0
  31. checkpoints/checkpoint-4689/scheduler.pt +3 -0
  32. checkpoints/checkpoint-4689/special_tokens_map.json +7 -0
  33. checkpoints/checkpoint-4689/tokenizer.json +0 -0
  34. checkpoints/checkpoint-4689/tokenizer_config.json +56 -0
  35. checkpoints/checkpoint-4689/trainer_state.json +464 -0
  36. checkpoints/checkpoint-4689/training_args.bin +3 -0
  37. checkpoints/checkpoint-4689/vocab.txt +0 -0
  38. config.json +24 -0
  39. logs/events.out.tfevents.1763396606.Pierruno.82048.0 +3 -0
  40. logs/events.out.tfevents.1763397273.Pierruno.82048.1 +3 -0
  41. logs/events.out.tfevents.1763397463.Pierruno.78916.0 +3 -0
  42. logs/events.out.tfevents.1763398281.Pierruno.78916.1 +3 -0
  43. model.safetensors +3 -0
  44. special_tokens_map.json +7 -0
  45. test_results.json +11 -0
  46. tokenizer.json +0 -0
  47. tokenizer_config.json +56 -0
  48. training_args.json +21 -0
  49. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: apache-2.0
4
+ tags:
5
+ - sentiment-analysis
6
+ - text-classification
7
+ - imdb
8
+ datasets:
9
+ - imdb
10
+ metrics:
11
+ - accuracy
12
+ - f1
13
+ model-index:
14
+ - name: sentiment-imdb-distilbert
15
+ results:
16
+ - task:
17
+ type: text-classification
18
+ name: Sentiment Analysis
19
+ dataset:
20
+ name: IMDB
21
+ type: imdb
22
+ metrics:
23
+ - type: accuracy
24
+ value: 0.9145
25
+ name: Accuracy
26
+ - type: f1
27
+ value: 0.9144
28
+ name: F1 Score
29
+ ---
30
+
31
+ # sentiment-imdb-distilbert
32
+
33
+ ## Model Description
34
+
35
+ Fine-tuned **distilbert-base-uncased** for binary sentiment classification on the IMDB movie review dataset.
36
+
37
+ **Training Date:** 2025-11-17 17:51:21
38
+
39
+ ## Intended Use
40
+
41
+ This model classifies text into positive or negative sentiment. It was trained on movie reviews but may generalize to other domains.
42
+
43
+ ## Performance
44
+
45
+ | Metric | Score |
46
+ |-----------|-------|
47
+ | Accuracy | 0.9145 |
48
+ | F1 Score | 0.9144 |
49
+ | Precision | 0.9154 |
50
+ | Recall | 0.9135 |
51
+
52
+ ## Training Hyperparameters
53
+
54
+ - **Model:** distilbert-base-uncased
55
+ - **Epochs:** 3
56
+ - **Batch Size:** 16
57
+ - **Learning Rate:** 2e-05
58
+ - **Max Length:** 256
59
+ - **Warmup Ratio:** 0.1
60
+ - **Weight Decay:** 0.01
61
+ - **Training Samples:** 25000
62
+
63
+ ## Usage
64
+
65
+ ```python
66
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
67
+ import torch
68
+
69
+ model_name = "PierrunoYT/sentiment-imdb-distilbert" if args.hf_username else "local model"
70
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
71
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
72
+
73
+ def predict(text):
74
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
75
+ with torch.no_grad():
76
+ outputs = model(**inputs)
77
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
78
+ return "Positive" if probs[0][1] > 0.5 else "Negative"
79
+
80
+ print(predict("This movie was amazing!")) # Positive
81
+ ```
82
+
83
+ ## Limitations
84
+
85
+ - Trained primarily on movie reviews; performance may vary on other text types
86
+ - May reflect biases present in the IMDB dataset
87
+ - English language only
88
+
89
+ ## Citation
90
+
91
+ ```
92
+ @misc{sentiment-imdb-distilbert,
93
+ author = {Your Name},
94
+ title = {Sentiment Analysis with distilbert-base-uncased},
95
+ year = {2025},
96
+ publisher = {HuggingFace},
97
+ howpublished = {\url{https://huggingface.co/PierrunoYT/sentiment-imdb-distilbert}}
98
+ }
99
+ ```
checkpoints/checkpoint-4000/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
checkpoints/checkpoint-4000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e251dab1ddbea9af67337e0c963c6b53c249a6ae9abd25431341b1e0a041d5
3
+ size 267832560
checkpoints/checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6371669abd718e0c7910fe0f552eed2855482ee0a93dbf84a841749d52b2bca8
3
+ size 535727290
checkpoints/checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e246deab8c1ed84dc59618a67f01b8cf36fbd8dfc802cc848fbfc628663fff8
3
+ size 14244
checkpoints/checkpoint-4000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b34d1be2b8715bc6ed09db10ced5480b0e06e02f93931c3966bd87d4b75d535
3
+ size 988
checkpoints/checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dbc1df1308a500ad62871152b56c5c819fc0923bf7b626fb90ade7e5d963a8
3
+ size 1064
checkpoints/checkpoint-4000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoints/checkpoint-4000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-4000/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoints/checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2500,
3
+ "best_metric": 0.9126647834274952,
4
+ "best_model_checkpoint": "./sentiment_model_transformer/checkpoints\\checkpoint-2500",
5
+ "epoch": 2.5591810620601407,
6
+ "eval_steps": 500,
7
+ "global_step": 4000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06397952655150352,
14
+ "grad_norm": 1.2034984827041626,
15
+ "learning_rate": 4.221748400852878e-06,
16
+ "loss": 0.693,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.12795905310300704,
21
+ "grad_norm": 6.451626777648926,
22
+ "learning_rate": 8.443496801705757e-06,
23
+ "loss": 0.582,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.19193857965451055,
28
+ "grad_norm": 6.7827935218811035,
29
+ "learning_rate": 1.2665245202558636e-05,
30
+ "loss": 0.3471,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.2559181062060141,
35
+ "grad_norm": 8.43331527709961,
36
+ "learning_rate": 1.6929637526652455e-05,
37
+ "loss": 0.3145,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.3198976327575176,
42
+ "grad_norm": 3.5554704666137695,
43
+ "learning_rate": 1.9867298578199055e-05,
44
+ "loss": 0.3029,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.3198976327575176,
49
+ "eval_accuracy": 0.88444,
50
+ "eval_f1": 0.8864966801555809,
51
+ "eval_loss": 0.2814505994319916,
52
+ "eval_precision": 0.8709951362618699,
53
+ "eval_recall": 0.90256,
54
+ "eval_runtime": 50.1894,
55
+ "eval_samples_per_second": 498.113,
56
+ "eval_steps_per_second": 31.142,
57
+ "step": 500
58
+ },
59
+ {
60
+ "epoch": 0.3838771593090211,
61
+ "grad_norm": 9.981550216674805,
62
+ "learning_rate": 1.9393364928909955e-05,
63
+ "loss": 0.3061,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 0.44785668586052463,
68
+ "grad_norm": 12.963433265686035,
69
+ "learning_rate": 1.8919431279620855e-05,
70
+ "loss": 0.299,
71
+ "step": 700
72
+ },
73
+ {
74
+ "epoch": 0.5118362124120281,
75
+ "grad_norm": 15.598821640014648,
76
+ "learning_rate": 1.8445497630331755e-05,
77
+ "loss": 0.3067,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 0.5758157389635317,
82
+ "grad_norm": 13.730338096618652,
83
+ "learning_rate": 1.7971563981042655e-05,
84
+ "loss": 0.2656,
85
+ "step": 900
86
+ },
87
+ {
88
+ "epoch": 0.6397952655150352,
89
+ "grad_norm": 6.761341094970703,
90
+ "learning_rate": 1.7497630331753558e-05,
91
+ "loss": 0.2907,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 0.6397952655150352,
96
+ "eval_accuracy": 0.89796,
97
+ "eval_f1": 0.90142586653271,
98
+ "eval_loss": 0.24962513148784637,
99
+ "eval_precision": 0.8718140369235369,
100
+ "eval_recall": 0.93312,
101
+ "eval_runtime": 59.8142,
102
+ "eval_samples_per_second": 417.961,
103
+ "eval_steps_per_second": 26.131,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 0.7037747920665387,
108
+ "grad_norm": 3.8816897869110107,
109
+ "learning_rate": 1.7023696682464458e-05,
110
+ "loss": 0.276,
111
+ "step": 1100
112
+ },
113
+ {
114
+ "epoch": 0.7677543186180422,
115
+ "grad_norm": 8.760039329528809,
116
+ "learning_rate": 1.6549763033175357e-05,
117
+ "loss": 0.2589,
118
+ "step": 1200
119
+ },
120
+ {
121
+ "epoch": 0.8317338451695457,
122
+ "grad_norm": 1.0741268396377563,
123
+ "learning_rate": 1.6075829383886257e-05,
124
+ "loss": 0.2585,
125
+ "step": 1300
126
+ },
127
+ {
128
+ "epoch": 0.8957133717210493,
129
+ "grad_norm": 5.111315727233887,
130
+ "learning_rate": 1.5601895734597157e-05,
131
+ "loss": 0.2564,
132
+ "step": 1400
133
+ },
134
+ {
135
+ "epoch": 0.9596928982725528,
136
+ "grad_norm": 12.378557205200195,
137
+ "learning_rate": 1.5127962085308059e-05,
138
+ "loss": 0.304,
139
+ "step": 1500
140
+ },
141
+ {
142
+ "epoch": 0.9596928982725528,
143
+ "eval_accuracy": 0.90884,
144
+ "eval_f1": 0.9085950346929772,
145
+ "eval_loss": 0.22750799357891083,
146
+ "eval_precision": 0.9110431915064747,
147
+ "eval_recall": 0.90616,
148
+ "eval_runtime": 47.4099,
149
+ "eval_samples_per_second": 527.317,
150
+ "eval_steps_per_second": 32.968,
151
+ "step": 1500
152
+ },
153
+ {
154
+ "epoch": 1.0236724248240563,
155
+ "grad_norm": 13.478399276733398,
156
+ "learning_rate": 1.4654028436018958e-05,
157
+ "loss": 0.19,
158
+ "step": 1600
159
+ },
160
+ {
161
+ "epoch": 1.0876519513755598,
162
+ "grad_norm": 5.589203357696533,
163
+ "learning_rate": 1.4180094786729858e-05,
164
+ "loss": 0.1837,
165
+ "step": 1700
166
+ },
167
+ {
168
+ "epoch": 1.1516314779270633,
169
+ "grad_norm": 5.220266819000244,
170
+ "learning_rate": 1.370616113744076e-05,
171
+ "loss": 0.2202,
172
+ "step": 1800
173
+ },
174
+ {
175
+ "epoch": 1.2156110044785668,
176
+ "grad_norm": 15.924393653869629,
177
+ "learning_rate": 1.323222748815166e-05,
178
+ "loss": 0.1922,
179
+ "step": 1900
180
+ },
181
+ {
182
+ "epoch": 1.2795905310300704,
183
+ "grad_norm": 3.0759739875793457,
184
+ "learning_rate": 1.2758293838862561e-05,
185
+ "loss": 0.1757,
186
+ "step": 2000
187
+ },
188
+ {
189
+ "epoch": 1.2795905310300704,
190
+ "eval_accuracy": 0.90656,
191
+ "eval_f1": 0.9086071987480439,
192
+ "eval_loss": 0.2567562162876129,
193
+ "eval_precision": 0.8891271056661562,
194
+ "eval_recall": 0.92896,
195
+ "eval_runtime": 58.5167,
196
+ "eval_samples_per_second": 427.228,
197
+ "eval_steps_per_second": 26.71,
198
+ "step": 2000
199
+ },
200
+ {
201
+ "epoch": 1.3435700575815739,
202
+ "grad_norm": 4.443939208984375,
203
+ "learning_rate": 1.228436018957346e-05,
204
+ "loss": 0.1916,
205
+ "step": 2100
206
+ },
207
+ {
208
+ "epoch": 1.4075495841330774,
209
+ "grad_norm": 11.637112617492676,
210
+ "learning_rate": 1.181042654028436e-05,
211
+ "loss": 0.1829,
212
+ "step": 2200
213
+ },
214
+ {
215
+ "epoch": 1.471529110684581,
216
+ "grad_norm": 5.834052562713623,
217
+ "learning_rate": 1.133649289099526e-05,
218
+ "loss": 0.1833,
219
+ "step": 2300
220
+ },
221
+ {
222
+ "epoch": 1.5355086372360844,
223
+ "grad_norm": 5.50422477722168,
224
+ "learning_rate": 1.086255924170616e-05,
225
+ "loss": 0.1702,
226
+ "step": 2400
227
+ },
228
+ {
229
+ "epoch": 1.599488163787588,
230
+ "grad_norm": 14.709617614746094,
231
+ "learning_rate": 1.0388625592417063e-05,
232
+ "loss": 0.189,
233
+ "step": 2500
234
+ },
235
+ {
236
+ "epoch": 1.599488163787588,
237
+ "eval_accuracy": 0.91096,
238
+ "eval_f1": 0.9126647834274952,
239
+ "eval_loss": 0.2622799873352051,
240
+ "eval_precision": 0.8955189405605174,
241
+ "eval_recall": 0.93048,
242
+ "eval_runtime": 57.0885,
243
+ "eval_samples_per_second": 437.917,
244
+ "eval_steps_per_second": 27.379,
245
+ "step": 2500
246
+ },
247
+ {
248
+ "epoch": 1.6634676903390915,
249
+ "grad_norm": 1.3613829612731934,
250
+ "learning_rate": 9.914691943127963e-06,
251
+ "loss": 0.2136,
252
+ "step": 2600
253
+ },
254
+ {
255
+ "epoch": 1.727447216890595,
256
+ "grad_norm": 5.129843235015869,
257
+ "learning_rate": 9.445497630331755e-06,
258
+ "loss": 0.1955,
259
+ "step": 2700
260
+ },
261
+ {
262
+ "epoch": 1.7914267434420985,
263
+ "grad_norm": 11.221867561340332,
264
+ "learning_rate": 8.971563981042654e-06,
265
+ "loss": 0.1888,
266
+ "step": 2800
267
+ },
268
+ {
269
+ "epoch": 1.855406269993602,
270
+ "grad_norm": 16.80147361755371,
271
+ "learning_rate": 8.497630331753554e-06,
272
+ "loss": 0.1715,
273
+ "step": 2900
274
+ },
275
+ {
276
+ "epoch": 1.9193857965451055,
277
+ "grad_norm": 1.4080605506896973,
278
+ "learning_rate": 8.023696682464456e-06,
279
+ "loss": 0.159,
280
+ "step": 3000
281
+ },
282
+ {
283
+ "epoch": 1.9193857965451055,
284
+ "eval_accuracy": 0.91048,
285
+ "eval_f1": 0.9090982940698619,
286
+ "eval_loss": 0.2924627661705017,
287
+ "eval_precision": 0.9233498349834983,
288
+ "eval_recall": 0.89528,
289
+ "eval_runtime": 57.9115,
290
+ "eval_samples_per_second": 431.693,
291
+ "eval_steps_per_second": 26.989,
292
+ "step": 3000
293
+ },
294
+ {
295
+ "epoch": 1.983365323096609,
296
+ "grad_norm": 10.077709197998047,
297
+ "learning_rate": 7.554502369668247e-06,
298
+ "loss": 0.1986,
299
+ "step": 3100
300
+ },
301
+ {
302
+ "epoch": 2.0473448496481126,
303
+ "grad_norm": 0.11581742018461227,
304
+ "learning_rate": 7.080568720379148e-06,
305
+ "loss": 0.1298,
306
+ "step": 3200
307
+ },
308
+ {
309
+ "epoch": 2.111324376199616,
310
+ "grad_norm": 0.21151360869407654,
311
+ "learning_rate": 6.606635071090048e-06,
312
+ "loss": 0.08,
313
+ "step": 3300
314
+ },
315
+ {
316
+ "epoch": 2.1753039027511196,
317
+ "grad_norm": 0.4802148938179016,
318
+ "learning_rate": 6.132701421800948e-06,
319
+ "loss": 0.1072,
320
+ "step": 3400
321
+ },
322
+ {
323
+ "epoch": 2.239283429302623,
324
+ "grad_norm": 21.213685989379883,
325
+ "learning_rate": 5.658767772511849e-06,
326
+ "loss": 0.1465,
327
+ "step": 3500
328
+ },
329
+ {
330
+ "epoch": 2.239283429302623,
331
+ "eval_accuracy": 0.91056,
332
+ "eval_f1": 0.9118226989510214,
333
+ "eval_loss": 0.3255835473537445,
334
+ "eval_precision": 0.8991289469590916,
335
+ "eval_recall": 0.92488,
336
+ "eval_runtime": 58.5259,
337
+ "eval_samples_per_second": 427.161,
338
+ "eval_steps_per_second": 26.706,
339
+ "step": 3500
340
+ },
341
+ {
342
+ "epoch": 2.3032629558541267,
343
+ "grad_norm": 2.2766849994659424,
344
+ "learning_rate": 5.1848341232227494e-06,
345
+ "loss": 0.1111,
346
+ "step": 3600
347
+ },
348
+ {
349
+ "epoch": 2.36724248240563,
350
+ "grad_norm": 2.196903944015503,
351
+ "learning_rate": 4.710900473933649e-06,
352
+ "loss": 0.0955,
353
+ "step": 3700
354
+ },
355
+ {
356
+ "epoch": 2.4312220089571337,
357
+ "grad_norm": 24.438621520996094,
358
+ "learning_rate": 4.23696682464455e-06,
359
+ "loss": 0.1145,
360
+ "step": 3800
361
+ },
362
+ {
363
+ "epoch": 2.495201535508637,
364
+ "grad_norm": 0.4384997487068176,
365
+ "learning_rate": 3.7630331753554506e-06,
366
+ "loss": 0.1342,
367
+ "step": 3900
368
+ },
369
+ {
370
+ "epoch": 2.5591810620601407,
371
+ "grad_norm": 7.266367435455322,
372
+ "learning_rate": 3.2890995260663512e-06,
373
+ "loss": 0.1028,
374
+ "step": 4000
375
+ },
376
+ {
377
+ "epoch": 2.5591810620601407,
378
+ "eval_accuracy": 0.91268,
379
+ "eval_f1": 0.9119225337905992,
380
+ "eval_loss": 0.31132665276527405,
381
+ "eval_precision": 0.9199023199023199,
382
+ "eval_recall": 0.90408,
383
+ "eval_runtime": 57.7956,
384
+ "eval_samples_per_second": 432.559,
385
+ "eval_steps_per_second": 27.044,
386
+ "step": 4000
387
+ }
388
+ ],
389
+ "logging_steps": 100,
390
+ "max_steps": 4689,
391
+ "num_input_tokens_seen": 0,
392
+ "num_train_epochs": 3,
393
+ "save_steps": 500,
394
+ "stateful_callbacks": {
395
+ "TrainerControl": {
396
+ "args": {
397
+ "should_epoch_stop": false,
398
+ "should_evaluate": false,
399
+ "should_log": false,
400
+ "should_save": true,
401
+ "should_training_stop": false
402
+ },
403
+ "attributes": {}
404
+ }
405
+ },
406
+ "total_flos": 4237897017802752.0,
407
+ "train_batch_size": 16,
408
+ "trial_name": null,
409
+ "trial_params": null
410
+ }
checkpoints/checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c927d2cb16dc987623e74b58eaecf59b19812c28268af85b7198d40b318a1776
3
+ size 5432
checkpoints/checkpoint-4000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-4500/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
checkpoints/checkpoint-4500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b04b3e3b130814f7e7803af2368c5f5fbe982ce36c08b473f16acc665256bfa
3
+ size 267832560
checkpoints/checkpoint-4500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d206a06b2435968ff7deeeb8de97b9b35fee8cb3976faf09899fef7b2e4e0886
3
+ size 535727290
checkpoints/checkpoint-4500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe58c49a4bfe7bca5f1b1910fe18fa34ba03d8335e01d3db3f999402b469410
3
+ size 14244
checkpoints/checkpoint-4500/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b6ab01cce18518b602553428d595df8bcf4506c096e9747cbb5f3f4ac94555
3
+ size 988
checkpoints/checkpoint-4500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a95a37fa91fa7c250d7742cdb09f56215352c9e81b719c2cf9281283b0657bae
3
+ size 1064
checkpoints/checkpoint-4500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoints/checkpoint-4500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-4500/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoints/checkpoint-4500/trainer_state.json ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 4500,
3
+ "best_metric": 0.9144344344344344,
4
+ "best_model_checkpoint": "./sentiment_model_transformer/checkpoints\\checkpoint-4500",
5
+ "epoch": 2.8790786948176583,
6
+ "eval_steps": 500,
7
+ "global_step": 4500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06397952655150352,
14
+ "grad_norm": 1.2034984827041626,
15
+ "learning_rate": 4.221748400852878e-06,
16
+ "loss": 0.693,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.12795905310300704,
21
+ "grad_norm": 6.451626777648926,
22
+ "learning_rate": 8.443496801705757e-06,
23
+ "loss": 0.582,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.19193857965451055,
28
+ "grad_norm": 6.7827935218811035,
29
+ "learning_rate": 1.2665245202558636e-05,
30
+ "loss": 0.3471,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.2559181062060141,
35
+ "grad_norm": 8.43331527709961,
36
+ "learning_rate": 1.6929637526652455e-05,
37
+ "loss": 0.3145,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.3198976327575176,
42
+ "grad_norm": 3.5554704666137695,
43
+ "learning_rate": 1.9867298578199055e-05,
44
+ "loss": 0.3029,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.3198976327575176,
49
+ "eval_accuracy": 0.88444,
50
+ "eval_f1": 0.8864966801555809,
51
+ "eval_loss": 0.2814505994319916,
52
+ "eval_precision": 0.8709951362618699,
53
+ "eval_recall": 0.90256,
54
+ "eval_runtime": 50.1894,
55
+ "eval_samples_per_second": 498.113,
56
+ "eval_steps_per_second": 31.142,
57
+ "step": 500
58
+ },
59
+ {
60
+ "epoch": 0.3838771593090211,
61
+ "grad_norm": 9.981550216674805,
62
+ "learning_rate": 1.9393364928909955e-05,
63
+ "loss": 0.3061,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 0.44785668586052463,
68
+ "grad_norm": 12.963433265686035,
69
+ "learning_rate": 1.8919431279620855e-05,
70
+ "loss": 0.299,
71
+ "step": 700
72
+ },
73
+ {
74
+ "epoch": 0.5118362124120281,
75
+ "grad_norm": 15.598821640014648,
76
+ "learning_rate": 1.8445497630331755e-05,
77
+ "loss": 0.3067,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 0.5758157389635317,
82
+ "grad_norm": 13.730338096618652,
83
+ "learning_rate": 1.7971563981042655e-05,
84
+ "loss": 0.2656,
85
+ "step": 900
86
+ },
87
+ {
88
+ "epoch": 0.6397952655150352,
89
+ "grad_norm": 6.761341094970703,
90
+ "learning_rate": 1.7497630331753558e-05,
91
+ "loss": 0.2907,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 0.6397952655150352,
96
+ "eval_accuracy": 0.89796,
97
+ "eval_f1": 0.90142586653271,
98
+ "eval_loss": 0.24962513148784637,
99
+ "eval_precision": 0.8718140369235369,
100
+ "eval_recall": 0.93312,
101
+ "eval_runtime": 59.8142,
102
+ "eval_samples_per_second": 417.961,
103
+ "eval_steps_per_second": 26.131,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 0.7037747920665387,
108
+ "grad_norm": 3.8816897869110107,
109
+ "learning_rate": 1.7023696682464458e-05,
110
+ "loss": 0.276,
111
+ "step": 1100
112
+ },
113
+ {
114
+ "epoch": 0.7677543186180422,
115
+ "grad_norm": 8.760039329528809,
116
+ "learning_rate": 1.6549763033175357e-05,
117
+ "loss": 0.2589,
118
+ "step": 1200
119
+ },
120
+ {
121
+ "epoch": 0.8317338451695457,
122
+ "grad_norm": 1.0741268396377563,
123
+ "learning_rate": 1.6075829383886257e-05,
124
+ "loss": 0.2585,
125
+ "step": 1300
126
+ },
127
+ {
128
+ "epoch": 0.8957133717210493,
129
+ "grad_norm": 5.111315727233887,
130
+ "learning_rate": 1.5601895734597157e-05,
131
+ "loss": 0.2564,
132
+ "step": 1400
133
+ },
134
+ {
135
+ "epoch": 0.9596928982725528,
136
+ "grad_norm": 12.378557205200195,
137
+ "learning_rate": 1.5127962085308059e-05,
138
+ "loss": 0.304,
139
+ "step": 1500
140
+ },
141
+ {
142
+ "epoch": 0.9596928982725528,
143
+ "eval_accuracy": 0.90884,
144
+ "eval_f1": 0.9085950346929772,
145
+ "eval_loss": 0.22750799357891083,
146
+ "eval_precision": 0.9110431915064747,
147
+ "eval_recall": 0.90616,
148
+ "eval_runtime": 47.4099,
149
+ "eval_samples_per_second": 527.317,
150
+ "eval_steps_per_second": 32.968,
151
+ "step": 1500
152
+ },
153
+ {
154
+ "epoch": 1.0236724248240563,
155
+ "grad_norm": 13.478399276733398,
156
+ "learning_rate": 1.4654028436018958e-05,
157
+ "loss": 0.19,
158
+ "step": 1600
159
+ },
160
+ {
161
+ "epoch": 1.0876519513755598,
162
+ "grad_norm": 5.589203357696533,
163
+ "learning_rate": 1.4180094786729858e-05,
164
+ "loss": 0.1837,
165
+ "step": 1700
166
+ },
167
+ {
168
+ "epoch": 1.1516314779270633,
169
+ "grad_norm": 5.220266819000244,
170
+ "learning_rate": 1.370616113744076e-05,
171
+ "loss": 0.2202,
172
+ "step": 1800
173
+ },
174
+ {
175
+ "epoch": 1.2156110044785668,
176
+ "grad_norm": 15.924393653869629,
177
+ "learning_rate": 1.323222748815166e-05,
178
+ "loss": 0.1922,
179
+ "step": 1900
180
+ },
181
+ {
182
+ "epoch": 1.2795905310300704,
183
+ "grad_norm": 3.0759739875793457,
184
+ "learning_rate": 1.2758293838862561e-05,
185
+ "loss": 0.1757,
186
+ "step": 2000
187
+ },
188
+ {
189
+ "epoch": 1.2795905310300704,
190
+ "eval_accuracy": 0.90656,
191
+ "eval_f1": 0.9086071987480439,
192
+ "eval_loss": 0.2567562162876129,
193
+ "eval_precision": 0.8891271056661562,
194
+ "eval_recall": 0.92896,
195
+ "eval_runtime": 58.5167,
196
+ "eval_samples_per_second": 427.228,
197
+ "eval_steps_per_second": 26.71,
198
+ "step": 2000
199
+ },
200
+ {
201
+ "epoch": 1.3435700575815739,
202
+ "grad_norm": 4.443939208984375,
203
+ "learning_rate": 1.228436018957346e-05,
204
+ "loss": 0.1916,
205
+ "step": 2100
206
+ },
207
+ {
208
+ "epoch": 1.4075495841330774,
209
+ "grad_norm": 11.637112617492676,
210
+ "learning_rate": 1.181042654028436e-05,
211
+ "loss": 0.1829,
212
+ "step": 2200
213
+ },
214
+ {
215
+ "epoch": 1.471529110684581,
216
+ "grad_norm": 5.834052562713623,
217
+ "learning_rate": 1.133649289099526e-05,
218
+ "loss": 0.1833,
219
+ "step": 2300
220
+ },
221
+ {
222
+ "epoch": 1.5355086372360844,
223
+ "grad_norm": 5.50422477722168,
224
+ "learning_rate": 1.086255924170616e-05,
225
+ "loss": 0.1702,
226
+ "step": 2400
227
+ },
228
+ {
229
+ "epoch": 1.599488163787588,
230
+ "grad_norm": 14.709617614746094,
231
+ "learning_rate": 1.0388625592417063e-05,
232
+ "loss": 0.189,
233
+ "step": 2500
234
+ },
235
+ {
236
+ "epoch": 1.599488163787588,
237
+ "eval_accuracy": 0.91096,
238
+ "eval_f1": 0.9126647834274952,
239
+ "eval_loss": 0.2622799873352051,
240
+ "eval_precision": 0.8955189405605174,
241
+ "eval_recall": 0.93048,
242
+ "eval_runtime": 57.0885,
243
+ "eval_samples_per_second": 437.917,
244
+ "eval_steps_per_second": 27.379,
245
+ "step": 2500
246
+ },
247
+ {
248
+ "epoch": 1.6634676903390915,
249
+ "grad_norm": 1.3613829612731934,
250
+ "learning_rate": 9.914691943127963e-06,
251
+ "loss": 0.2136,
252
+ "step": 2600
253
+ },
254
+ {
255
+ "epoch": 1.727447216890595,
256
+ "grad_norm": 5.129843235015869,
257
+ "learning_rate": 9.445497630331755e-06,
258
+ "loss": 0.1955,
259
+ "step": 2700
260
+ },
261
+ {
262
+ "epoch": 1.7914267434420985,
263
+ "grad_norm": 11.221867561340332,
264
+ "learning_rate": 8.971563981042654e-06,
265
+ "loss": 0.1888,
266
+ "step": 2800
267
+ },
268
+ {
269
+ "epoch": 1.855406269993602,
270
+ "grad_norm": 16.80147361755371,
271
+ "learning_rate": 8.497630331753554e-06,
272
+ "loss": 0.1715,
273
+ "step": 2900
274
+ },
275
+ {
276
+ "epoch": 1.9193857965451055,
277
+ "grad_norm": 1.4080605506896973,
278
+ "learning_rate": 8.023696682464456e-06,
279
+ "loss": 0.159,
280
+ "step": 3000
281
+ },
282
+ {
283
+ "epoch": 1.9193857965451055,
284
+ "eval_accuracy": 0.91048,
285
+ "eval_f1": 0.9090982940698619,
286
+ "eval_loss": 0.2924627661705017,
287
+ "eval_precision": 0.9233498349834983,
288
+ "eval_recall": 0.89528,
289
+ "eval_runtime": 57.9115,
290
+ "eval_samples_per_second": 431.693,
291
+ "eval_steps_per_second": 26.989,
292
+ "step": 3000
293
+ },
294
+ {
295
+ "epoch": 1.983365323096609,
296
+ "grad_norm": 10.077709197998047,
297
+ "learning_rate": 7.554502369668247e-06,
298
+ "loss": 0.1986,
299
+ "step": 3100
300
+ },
301
+ {
302
+ "epoch": 2.0473448496481126,
303
+ "grad_norm": 0.11581742018461227,
304
+ "learning_rate": 7.080568720379148e-06,
305
+ "loss": 0.1298,
306
+ "step": 3200
307
+ },
308
+ {
309
+ "epoch": 2.111324376199616,
310
+ "grad_norm": 0.21151360869407654,
311
+ "learning_rate": 6.606635071090048e-06,
312
+ "loss": 0.08,
313
+ "step": 3300
314
+ },
315
+ {
316
+ "epoch": 2.1753039027511196,
317
+ "grad_norm": 0.4802148938179016,
318
+ "learning_rate": 6.132701421800948e-06,
319
+ "loss": 0.1072,
320
+ "step": 3400
321
+ },
322
+ {
323
+ "epoch": 2.239283429302623,
324
+ "grad_norm": 21.213685989379883,
325
+ "learning_rate": 5.658767772511849e-06,
326
+ "loss": 0.1465,
327
+ "step": 3500
328
+ },
329
+ {
330
+ "epoch": 2.239283429302623,
331
+ "eval_accuracy": 0.91056,
332
+ "eval_f1": 0.9118226989510214,
333
+ "eval_loss": 0.3255835473537445,
334
+ "eval_precision": 0.8991289469590916,
335
+ "eval_recall": 0.92488,
336
+ "eval_runtime": 58.5259,
337
+ "eval_samples_per_second": 427.161,
338
+ "eval_steps_per_second": 26.706,
339
+ "step": 3500
340
+ },
341
+ {
342
+ "epoch": 2.3032629558541267,
343
+ "grad_norm": 2.2766849994659424,
344
+ "learning_rate": 5.1848341232227494e-06,
345
+ "loss": 0.1111,
346
+ "step": 3600
347
+ },
348
+ {
349
+ "epoch": 2.36724248240563,
350
+ "grad_norm": 2.196903944015503,
351
+ "learning_rate": 4.710900473933649e-06,
352
+ "loss": 0.0955,
353
+ "step": 3700
354
+ },
355
+ {
356
+ "epoch": 2.4312220089571337,
357
+ "grad_norm": 24.438621520996094,
358
+ "learning_rate": 4.23696682464455e-06,
359
+ "loss": 0.1145,
360
+ "step": 3800
361
+ },
362
+ {
363
+ "epoch": 2.495201535508637,
364
+ "grad_norm": 0.4384997487068176,
365
+ "learning_rate": 3.7630331753554506e-06,
366
+ "loss": 0.1342,
367
+ "step": 3900
368
+ },
369
+ {
370
+ "epoch": 2.5591810620601407,
371
+ "grad_norm": 7.266367435455322,
372
+ "learning_rate": 3.2890995260663512e-06,
373
+ "loss": 0.1028,
374
+ "step": 4000
375
+ },
376
+ {
377
+ "epoch": 2.5591810620601407,
378
+ "eval_accuracy": 0.91268,
379
+ "eval_f1": 0.9119225337905992,
380
+ "eval_loss": 0.31132665276527405,
381
+ "eval_precision": 0.9199023199023199,
382
+ "eval_recall": 0.90408,
383
+ "eval_runtime": 57.7956,
384
+ "eval_samples_per_second": 432.559,
385
+ "eval_steps_per_second": 27.044,
386
+ "step": 4000
387
+ },
388
+ {
389
+ "epoch": 2.6231605886116443,
390
+ "grad_norm": 0.180605947971344,
391
+ "learning_rate": 2.8151658767772515e-06,
392
+ "loss": 0.1003,
393
+ "step": 4100
394
+ },
395
+ {
396
+ "epoch": 2.6871401151631478,
397
+ "grad_norm": 7.690706729888916,
398
+ "learning_rate": 2.3412322274881517e-06,
399
+ "loss": 0.1095,
400
+ "step": 4200
401
+ },
402
+ {
403
+ "epoch": 2.7511196417146513,
404
+ "grad_norm": 5.688648700714111,
405
+ "learning_rate": 1.8672985781990523e-06,
406
+ "loss": 0.0973,
407
+ "step": 4300
408
+ },
409
+ {
410
+ "epoch": 2.815099168266155,
411
+ "grad_norm": 0.07725273072719574,
412
+ "learning_rate": 1.3933649289099526e-06,
413
+ "loss": 0.0937,
414
+ "step": 4400
415
+ },
416
+ {
417
+ "epoch": 2.8790786948176583,
418
+ "grad_norm": 0.1056542843580246,
419
+ "learning_rate": 9.194312796208532e-07,
420
+ "loss": 0.1014,
421
+ "step": 4500
422
+ },
423
+ {
424
+ "epoch": 2.8790786948176583,
425
+ "eval_accuracy": 0.91452,
426
+ "eval_f1": 0.9144344344344344,
427
+ "eval_loss": 0.3255852460861206,
428
+ "eval_precision": 0.9153507014028056,
429
+ "eval_recall": 0.91352,
430
+ "eval_runtime": 58.1033,
431
+ "eval_samples_per_second": 430.268,
432
+ "eval_steps_per_second": 26.9,
433
+ "step": 4500
434
+ }
435
+ ],
436
+ "logging_steps": 100,
437
+ "max_steps": 4689,
438
+ "num_input_tokens_seen": 0,
439
+ "num_train_epochs": 3,
440
+ "save_steps": 500,
441
+ "stateful_callbacks": {
442
+ "TrainerControl": {
443
+ "args": {
444
+ "should_epoch_stop": false,
445
+ "should_evaluate": false,
446
+ "should_log": false,
447
+ "should_save": true,
448
+ "should_training_stop": false
449
+ },
450
+ "attributes": {}
451
+ }
452
+ },
453
+ "total_flos": 4767766612426752.0,
454
+ "train_batch_size": 16,
455
+ "trial_name": null,
456
+ "trial_params": null
457
+ }
checkpoints/checkpoint-4500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c927d2cb16dc987623e74b58eaecf59b19812c28268af85b7198d40b318a1776
3
+ size 5432
checkpoints/checkpoint-4500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-4689/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
checkpoints/checkpoint-4689/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8037024490e99573351988c7e3190c515258900b4a6139f06fbb3e56efd0d0bd
3
+ size 267832560
checkpoints/checkpoint-4689/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7e77c8d70bae546733649471f8421d53b57e7d16fb152e4abda9b38994d49c6
3
+ size 535727290
checkpoints/checkpoint-4689/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:557a4b4db0462bf7b7170907d83ef303dc09f0beb194b0b0f593f2532b7aba0b
3
+ size 14244
checkpoints/checkpoint-4689/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f64d996bc4957a751abff75b472ce689bdfc2567051044a4c608cb33d0f884ef
3
+ size 988
checkpoints/checkpoint-4689/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10465a3fbe5f978b6bf61ae32e12643c645ff7251d4206cc44da4a0cbe1826b3
3
+ size 1064
checkpoints/checkpoint-4689/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoints/checkpoint-4689/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/checkpoint-4689/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoints/checkpoint-4689/trainer_state.json ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 4500,
3
+ "best_metric": 0.9144344344344344,
4
+ "best_model_checkpoint": "./sentiment_model_transformer/checkpoints\\checkpoint-4500",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 4689,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06397952655150352,
14
+ "grad_norm": 1.2034984827041626,
15
+ "learning_rate": 4.221748400852878e-06,
16
+ "loss": 0.693,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.12795905310300704,
21
+ "grad_norm": 6.451626777648926,
22
+ "learning_rate": 8.443496801705757e-06,
23
+ "loss": 0.582,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.19193857965451055,
28
+ "grad_norm": 6.7827935218811035,
29
+ "learning_rate": 1.2665245202558636e-05,
30
+ "loss": 0.3471,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.2559181062060141,
35
+ "grad_norm": 8.43331527709961,
36
+ "learning_rate": 1.6929637526652455e-05,
37
+ "loss": 0.3145,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.3198976327575176,
42
+ "grad_norm": 3.5554704666137695,
43
+ "learning_rate": 1.9867298578199055e-05,
44
+ "loss": 0.3029,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.3198976327575176,
49
+ "eval_accuracy": 0.88444,
50
+ "eval_f1": 0.8864966801555809,
51
+ "eval_loss": 0.2814505994319916,
52
+ "eval_precision": 0.8709951362618699,
53
+ "eval_recall": 0.90256,
54
+ "eval_runtime": 50.1894,
55
+ "eval_samples_per_second": 498.113,
56
+ "eval_steps_per_second": 31.142,
57
+ "step": 500
58
+ },
59
+ {
60
+ "epoch": 0.3838771593090211,
61
+ "grad_norm": 9.981550216674805,
62
+ "learning_rate": 1.9393364928909955e-05,
63
+ "loss": 0.3061,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 0.44785668586052463,
68
+ "grad_norm": 12.963433265686035,
69
+ "learning_rate": 1.8919431279620855e-05,
70
+ "loss": 0.299,
71
+ "step": 700
72
+ },
73
+ {
74
+ "epoch": 0.5118362124120281,
75
+ "grad_norm": 15.598821640014648,
76
+ "learning_rate": 1.8445497630331755e-05,
77
+ "loss": 0.3067,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 0.5758157389635317,
82
+ "grad_norm": 13.730338096618652,
83
+ "learning_rate": 1.7971563981042655e-05,
84
+ "loss": 0.2656,
85
+ "step": 900
86
+ },
87
+ {
88
+ "epoch": 0.6397952655150352,
89
+ "grad_norm": 6.761341094970703,
90
+ "learning_rate": 1.7497630331753558e-05,
91
+ "loss": 0.2907,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 0.6397952655150352,
96
+ "eval_accuracy": 0.89796,
97
+ "eval_f1": 0.90142586653271,
98
+ "eval_loss": 0.24962513148784637,
99
+ "eval_precision": 0.8718140369235369,
100
+ "eval_recall": 0.93312,
101
+ "eval_runtime": 59.8142,
102
+ "eval_samples_per_second": 417.961,
103
+ "eval_steps_per_second": 26.131,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 0.7037747920665387,
108
+ "grad_norm": 3.8816897869110107,
109
+ "learning_rate": 1.7023696682464458e-05,
110
+ "loss": 0.276,
111
+ "step": 1100
112
+ },
113
+ {
114
+ "epoch": 0.7677543186180422,
115
+ "grad_norm": 8.760039329528809,
116
+ "learning_rate": 1.6549763033175357e-05,
117
+ "loss": 0.2589,
118
+ "step": 1200
119
+ },
120
+ {
121
+ "epoch": 0.8317338451695457,
122
+ "grad_norm": 1.0741268396377563,
123
+ "learning_rate": 1.6075829383886257e-05,
124
+ "loss": 0.2585,
125
+ "step": 1300
126
+ },
127
+ {
128
+ "epoch": 0.8957133717210493,
129
+ "grad_norm": 5.111315727233887,
130
+ "learning_rate": 1.5601895734597157e-05,
131
+ "loss": 0.2564,
132
+ "step": 1400
133
+ },
134
+ {
135
+ "epoch": 0.9596928982725528,
136
+ "grad_norm": 12.378557205200195,
137
+ "learning_rate": 1.5127962085308059e-05,
138
+ "loss": 0.304,
139
+ "step": 1500
140
+ },
141
+ {
142
+ "epoch": 0.9596928982725528,
143
+ "eval_accuracy": 0.90884,
144
+ "eval_f1": 0.9085950346929772,
145
+ "eval_loss": 0.22750799357891083,
146
+ "eval_precision": 0.9110431915064747,
147
+ "eval_recall": 0.90616,
148
+ "eval_runtime": 47.4099,
149
+ "eval_samples_per_second": 527.317,
150
+ "eval_steps_per_second": 32.968,
151
+ "step": 1500
152
+ },
153
+ {
154
+ "epoch": 1.0236724248240563,
155
+ "grad_norm": 13.478399276733398,
156
+ "learning_rate": 1.4654028436018958e-05,
157
+ "loss": 0.19,
158
+ "step": 1600
159
+ },
160
+ {
161
+ "epoch": 1.0876519513755598,
162
+ "grad_norm": 5.589203357696533,
163
+ "learning_rate": 1.4180094786729858e-05,
164
+ "loss": 0.1837,
165
+ "step": 1700
166
+ },
167
+ {
168
+ "epoch": 1.1516314779270633,
169
+ "grad_norm": 5.220266819000244,
170
+ "learning_rate": 1.370616113744076e-05,
171
+ "loss": 0.2202,
172
+ "step": 1800
173
+ },
174
+ {
175
+ "epoch": 1.2156110044785668,
176
+ "grad_norm": 15.924393653869629,
177
+ "learning_rate": 1.323222748815166e-05,
178
+ "loss": 0.1922,
179
+ "step": 1900
180
+ },
181
+ {
182
+ "epoch": 1.2795905310300704,
183
+ "grad_norm": 3.0759739875793457,
184
+ "learning_rate": 1.2758293838862561e-05,
185
+ "loss": 0.1757,
186
+ "step": 2000
187
+ },
188
+ {
189
+ "epoch": 1.2795905310300704,
190
+ "eval_accuracy": 0.90656,
191
+ "eval_f1": 0.9086071987480439,
192
+ "eval_loss": 0.2567562162876129,
193
+ "eval_precision": 0.8891271056661562,
194
+ "eval_recall": 0.92896,
195
+ "eval_runtime": 58.5167,
196
+ "eval_samples_per_second": 427.228,
197
+ "eval_steps_per_second": 26.71,
198
+ "step": 2000
199
+ },
200
+ {
201
+ "epoch": 1.3435700575815739,
202
+ "grad_norm": 4.443939208984375,
203
+ "learning_rate": 1.228436018957346e-05,
204
+ "loss": 0.1916,
205
+ "step": 2100
206
+ },
207
+ {
208
+ "epoch": 1.4075495841330774,
209
+ "grad_norm": 11.637112617492676,
210
+ "learning_rate": 1.181042654028436e-05,
211
+ "loss": 0.1829,
212
+ "step": 2200
213
+ },
214
+ {
215
+ "epoch": 1.471529110684581,
216
+ "grad_norm": 5.834052562713623,
217
+ "learning_rate": 1.133649289099526e-05,
218
+ "loss": 0.1833,
219
+ "step": 2300
220
+ },
221
+ {
222
+ "epoch": 1.5355086372360844,
223
+ "grad_norm": 5.50422477722168,
224
+ "learning_rate": 1.086255924170616e-05,
225
+ "loss": 0.1702,
226
+ "step": 2400
227
+ },
228
+ {
229
+ "epoch": 1.599488163787588,
230
+ "grad_norm": 14.709617614746094,
231
+ "learning_rate": 1.0388625592417063e-05,
232
+ "loss": 0.189,
233
+ "step": 2500
234
+ },
235
+ {
236
+ "epoch": 1.599488163787588,
237
+ "eval_accuracy": 0.91096,
238
+ "eval_f1": 0.9126647834274952,
239
+ "eval_loss": 0.2622799873352051,
240
+ "eval_precision": 0.8955189405605174,
241
+ "eval_recall": 0.93048,
242
+ "eval_runtime": 57.0885,
243
+ "eval_samples_per_second": 437.917,
244
+ "eval_steps_per_second": 27.379,
245
+ "step": 2500
246
+ },
247
+ {
248
+ "epoch": 1.6634676903390915,
249
+ "grad_norm": 1.3613829612731934,
250
+ "learning_rate": 9.914691943127963e-06,
251
+ "loss": 0.2136,
252
+ "step": 2600
253
+ },
254
+ {
255
+ "epoch": 1.727447216890595,
256
+ "grad_norm": 5.129843235015869,
257
+ "learning_rate": 9.445497630331755e-06,
258
+ "loss": 0.1955,
259
+ "step": 2700
260
+ },
261
+ {
262
+ "epoch": 1.7914267434420985,
263
+ "grad_norm": 11.221867561340332,
264
+ "learning_rate": 8.971563981042654e-06,
265
+ "loss": 0.1888,
266
+ "step": 2800
267
+ },
268
+ {
269
+ "epoch": 1.855406269993602,
270
+ "grad_norm": 16.80147361755371,
271
+ "learning_rate": 8.497630331753554e-06,
272
+ "loss": 0.1715,
273
+ "step": 2900
274
+ },
275
+ {
276
+ "epoch": 1.9193857965451055,
277
+ "grad_norm": 1.4080605506896973,
278
+ "learning_rate": 8.023696682464456e-06,
279
+ "loss": 0.159,
280
+ "step": 3000
281
+ },
282
+ {
283
+ "epoch": 1.9193857965451055,
284
+ "eval_accuracy": 0.91048,
285
+ "eval_f1": 0.9090982940698619,
286
+ "eval_loss": 0.2924627661705017,
287
+ "eval_precision": 0.9233498349834983,
288
+ "eval_recall": 0.89528,
289
+ "eval_runtime": 57.9115,
290
+ "eval_samples_per_second": 431.693,
291
+ "eval_steps_per_second": 26.989,
292
+ "step": 3000
293
+ },
294
+ {
295
+ "epoch": 1.983365323096609,
296
+ "grad_norm": 10.077709197998047,
297
+ "learning_rate": 7.554502369668247e-06,
298
+ "loss": 0.1986,
299
+ "step": 3100
300
+ },
301
+ {
302
+ "epoch": 2.0473448496481126,
303
+ "grad_norm": 0.11581742018461227,
304
+ "learning_rate": 7.080568720379148e-06,
305
+ "loss": 0.1298,
306
+ "step": 3200
307
+ },
308
+ {
309
+ "epoch": 2.111324376199616,
310
+ "grad_norm": 0.21151360869407654,
311
+ "learning_rate": 6.606635071090048e-06,
312
+ "loss": 0.08,
313
+ "step": 3300
314
+ },
315
+ {
316
+ "epoch": 2.1753039027511196,
317
+ "grad_norm": 0.4802148938179016,
318
+ "learning_rate": 6.132701421800948e-06,
319
+ "loss": 0.1072,
320
+ "step": 3400
321
+ },
322
+ {
323
+ "epoch": 2.239283429302623,
324
+ "grad_norm": 21.213685989379883,
325
+ "learning_rate": 5.658767772511849e-06,
326
+ "loss": 0.1465,
327
+ "step": 3500
328
+ },
329
+ {
330
+ "epoch": 2.239283429302623,
331
+ "eval_accuracy": 0.91056,
332
+ "eval_f1": 0.9118226989510214,
333
+ "eval_loss": 0.3255835473537445,
334
+ "eval_precision": 0.8991289469590916,
335
+ "eval_recall": 0.92488,
336
+ "eval_runtime": 58.5259,
337
+ "eval_samples_per_second": 427.161,
338
+ "eval_steps_per_second": 26.706,
339
+ "step": 3500
340
+ },
341
+ {
342
+ "epoch": 2.3032629558541267,
343
+ "grad_norm": 2.2766849994659424,
344
+ "learning_rate": 5.1848341232227494e-06,
345
+ "loss": 0.1111,
346
+ "step": 3600
347
+ },
348
+ {
349
+ "epoch": 2.36724248240563,
350
+ "grad_norm": 2.196903944015503,
351
+ "learning_rate": 4.710900473933649e-06,
352
+ "loss": 0.0955,
353
+ "step": 3700
354
+ },
355
+ {
356
+ "epoch": 2.4312220089571337,
357
+ "grad_norm": 24.438621520996094,
358
+ "learning_rate": 4.23696682464455e-06,
359
+ "loss": 0.1145,
360
+ "step": 3800
361
+ },
362
+ {
363
+ "epoch": 2.495201535508637,
364
+ "grad_norm": 0.4384997487068176,
365
+ "learning_rate": 3.7630331753554506e-06,
366
+ "loss": 0.1342,
367
+ "step": 3900
368
+ },
369
+ {
370
+ "epoch": 2.5591810620601407,
371
+ "grad_norm": 7.266367435455322,
372
+ "learning_rate": 3.2890995260663512e-06,
373
+ "loss": 0.1028,
374
+ "step": 4000
375
+ },
376
+ {
377
+ "epoch": 2.5591810620601407,
378
+ "eval_accuracy": 0.91268,
379
+ "eval_f1": 0.9119225337905992,
380
+ "eval_loss": 0.31132665276527405,
381
+ "eval_precision": 0.9199023199023199,
382
+ "eval_recall": 0.90408,
383
+ "eval_runtime": 57.7956,
384
+ "eval_samples_per_second": 432.559,
385
+ "eval_steps_per_second": 27.044,
386
+ "step": 4000
387
+ },
388
+ {
389
+ "epoch": 2.6231605886116443,
390
+ "grad_norm": 0.180605947971344,
391
+ "learning_rate": 2.8151658767772515e-06,
392
+ "loss": 0.1003,
393
+ "step": 4100
394
+ },
395
+ {
396
+ "epoch": 2.6871401151631478,
397
+ "grad_norm": 7.690706729888916,
398
+ "learning_rate": 2.3412322274881517e-06,
399
+ "loss": 0.1095,
400
+ "step": 4200
401
+ },
402
+ {
403
+ "epoch": 2.7511196417146513,
404
+ "grad_norm": 5.688648700714111,
405
+ "learning_rate": 1.8672985781990523e-06,
406
+ "loss": 0.0973,
407
+ "step": 4300
408
+ },
409
+ {
410
+ "epoch": 2.815099168266155,
411
+ "grad_norm": 0.07725273072719574,
412
+ "learning_rate": 1.3933649289099526e-06,
413
+ "loss": 0.0937,
414
+ "step": 4400
415
+ },
416
+ {
417
+ "epoch": 2.8790786948176583,
418
+ "grad_norm": 0.1056542843580246,
419
+ "learning_rate": 9.194312796208532e-07,
420
+ "loss": 0.1014,
421
+ "step": 4500
422
+ },
423
+ {
424
+ "epoch": 2.8790786948176583,
425
+ "eval_accuracy": 0.91452,
426
+ "eval_f1": 0.9144344344344344,
427
+ "eval_loss": 0.3255852460861206,
428
+ "eval_precision": 0.9153507014028056,
429
+ "eval_recall": 0.91352,
430
+ "eval_runtime": 58.1033,
431
+ "eval_samples_per_second": 430.268,
432
+ "eval_steps_per_second": 26.9,
433
+ "step": 4500
434
+ },
435
+ {
436
+ "epoch": 2.943058221369162,
437
+ "grad_norm": 1.3683459758758545,
438
+ "learning_rate": 4.4549763033175363e-07,
439
+ "loss": 0.1032,
440
+ "step": 4600
441
+ }
442
+ ],
443
+ "logging_steps": 100,
444
+ "max_steps": 4689,
445
+ "num_input_tokens_seen": 0,
446
+ "num_train_epochs": 3,
447
+ "save_steps": 500,
448
+ "stateful_callbacks": {
449
+ "TrainerControl": {
450
+ "args": {
451
+ "should_epoch_stop": false,
452
+ "should_evaluate": false,
453
+ "should_log": false,
454
+ "should_save": true,
455
+ "should_training_stop": true
456
+ },
457
+ "attributes": {}
458
+ }
459
+ },
460
+ "total_flos": 4967527449600000.0,
461
+ "train_batch_size": 16,
462
+ "trial_name": null,
463
+ "trial_params": null
464
+ }
checkpoints/checkpoint-4689/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c927d2cb16dc987623e74b58eaecf59b19812c28268af85b7198d40b318a1776
3
+ size 5432
checkpoints/checkpoint-4689/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
logs/events.out.tfevents.1763396606.Pierruno.82048.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e87e4f41d7162016580efc7f422161e365100da1aabbf4ac0076e8f02bdc57
3
+ size 14610
logs/events.out.tfevents.1763397273.Pierruno.82048.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba1329b2d62b1a8b42ae1b04f012e99377a488266851e62ae59d2d4350bf8ff
3
+ size 560
logs/events.out.tfevents.1763397463.Pierruno.78916.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371f32d43819eb8a8f78fe673491637b4fc325bc8bc65ea3a367a72ef0acc83d
3
+ size 19191
logs/events.out.tfevents.1763398281.Pierruno.78916.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620907bfea8efd0bb7b38bc214bacc83443342d2e3a201cbab628a23520f1a56
3
+ size 560
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b04b3e3b130814f7e7803af2368c5f5fbe982ce36c08b473f16acc665256bfa
3
+ size 267832560
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
test_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_loss": 0.3255852460861206,
3
+ "eval_accuracy": 0.91452,
4
+ "eval_f1": 0.9144344344344344,
5
+ "eval_precision": 0.9153507014028056,
6
+ "eval_recall": 0.91352,
7
+ "eval_runtime": 56.7332,
8
+ "eval_samples_per_second": 440.659,
9
+ "eval_steps_per_second": 27.55,
10
+ "epoch": 3.0
11
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_args.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "distilbert-base-uncased",
3
+ "max_length": 256,
4
+ "batch_size": 16,
5
+ "epochs": 3,
6
+ "learning_rate": 2e-05,
7
+ "warmup_ratio": 0.1,
8
+ "weight_decay": 0.01,
9
+ "gradient_accumulation_steps": 1,
10
+ "max_train_samples": 25000,
11
+ "max_eval_samples": null,
12
+ "output_dir": "./sentiment_model_transformer",
13
+ "seed": 42,
14
+ "save_steps": 500,
15
+ "eval_steps": 500,
16
+ "logging_steps": 100,
17
+ "hf_username": "PierrunoYT",
18
+ "repo_name": "sentiment-imdb-distilbert",
19
+ "no_cuda": false,
20
+ "fp16": false
21
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff