alok-essential commited on
Commit
f9af95c
·
1 Parent(s): 934f618

initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model-00001-of-00007.safetensors filter=lfs diff=lfs merge=lfs -text
37
+ model-00002-of-00007.safetensors filter=lfs diff=lfs merge=lfs -text
38
+ model-00003-of-00007.safetensors filter=lfs diff=lfs merge=lfs -text
39
+ model-00004-of-00007.safetensors filter=lfs diff=lfs merge=lfs -text
40
+ model-00005-of-00007.safetensors filter=lfs diff=lfs merge=lfs -text
41
+ model-00006-of-00007.safetensors filter=lfs diff=lfs merge=lfs -text
42
+ model-00007-of-00007.safetensors filter=lfs diff=lfs merge=lfs -text
43
+ tokenizer filter=lfs diff=lfs merge=lfs -text
44
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2025 Essential AI
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # Rnj-1
5
+
6
+ <p align="center">
7
+ <img src="https://raw.githubusercontent.com/Essential-AI/rnj-1-assets/refs/heads/main/assets/Essential%20Logo%20Color_Color_With%20Space.jpg" width=60% alt="EssentialAI">
8
+ </p>
9
+
10
+ <div align="center" style="line-height: 1;">
11
+
12
+ <!-- Website -->
13
+ <a href="https://essential.ai">
14
+ <img alt="Homepage"
15
+ src="https://img.shields.io/badge/%F0%9F%8C%90%20Website-essential.ai-4b9fe1?color=4b9fe1&logoColor=white"/>
16
+ </a>
17
+
18
+ <!-- Blog / Research -->
19
+ <a href="https://www.essential.ai/research/rnj-1">
20
+ <img alt="Research Blog"
21
+ src="https://img.shields.io/badge/🧠%20Research-rnj--1-7c5cff?color=7c5cff&logoColor=white"/>
22
+ </a>
23
+
24
+ <!-- HuggingFace -->
25
+ <a href="https://huggingface.co/collections/EssentialAI/rnj-1">
26
+ <img alt="Hugging Face"
27
+ src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-rnj--1-ffc107?color=ffc107&logoColor=white"/>
28
+ </a>
29
+
30
+ <br>
31
+
32
+ <!-- Discord -->
33
+ <a href="https://discord.gg/VPEqUNg6tR">
34
+ <img alt="Discord"
35
+ src="https://img.shields.io/badge/Discord-Essential%20AI-7289da?logo=discord&logoColor=white&color=7289da"/>
36
+ </a>
37
+
38
+ <!-- X / Twitter -->
39
+ <a href="https://x.com/essential_ai">
40
+ <img alt="Twitter Follow"
41
+ src="https://img.shields.io/badge/Twitter-essential__ai-white?logo=x&logoColor=white"/>
42
+ </a>
43
+
44
+ <!-- Together AI -->
45
+ <a href="https://api.together.ai/models/essentialai/rnj-1-instruct">
46
+ <img alt="Together AI"
47
+ src="https://img.shields.io/badge/⚡%20TogetherAI-rnj--1--instruct-00c2a8?color=00c2a8&logoColor=white"/>
48
+ </a>
49
+
50
+ <br>
51
+ </div>
52
+
53
+ Rnj-1 is a family of 8B parameter open-weight, dense models trained from scratch by Essential AI, optimized for code and STEM with capabilities on par with SOTA open-weight models. These models perform well across a range of programming languages and boast strong agentic capabilities (e.g., inside agentic frameworks like mini-SWE-agent), while also excelling at tool-calling. They additionally exhibit strong capabilities in math and science. Herein, `rnj-1` refers to the base model, while `rnj-1-instruct` refers to the post-trained instruction tuned model.
54
+
55
+ # Capabilities
56
+
57
+ We evaluate Rnj-1 models against models of comparable size. In addition to accuracy, we also show the FLOPs used in pre-training for each model.
58
+
59
+ ### Benchmark Results
60
+
61
+ ### Base Model `rnj-1`
62
+
63
+ <p align="center">
64
+ <img src="https://raw.githubusercontent.com/Essential-AI/rnj-1-assets/refs/heads/main/assets/Base_Full_Table.png" width="100%" alt="Base Evals"/>
65
+ </p>
66
+
67
+ ### Instruct Model `rnj-1-instruct`
68
+
69
+ `rnj-1-instruct` is strong at code, math, and STEM tasks. It also performs well within agentic frameworks such as mini-swe-agent and has stellar tool use abilities.
70
+
71
+ <p align="center">
72
+ <img src="https://raw.githubusercontent.com/Essential-AI/rnj-1-assets/refs/heads/main/assets/Instruct_Full_Table.png" width="100%" alt="Instrcut Evals"/>
73
+
74
+ <sub><i>We report published numbers when possible, and when unavailable they are internal reproductions.
75
+ Pre-training FLOPs were estimated using 6nt, where n is the number of parameters and t is the token budget.
76
+ All Evals under the Env bucket were evaluated using mini-swe-agent (bash only) scaffolding.
77
+ GPT OSS 20B was evaluated with reasoning_effort=low.
78
+ Qwen 3 8B was evaluated with thinking turned off.</i></sub></p>
79
+
80
+ ### Rnj-1 models are designed to be extended
81
+
82
+ Both `rnj-1` and `rnj-1-instruct` models are being made available for the community to extend and build upon. We deliberately kept post-training limited to allow for further specialization by the community. As an indicator of the untapped potential of the models we report `pass@{1,2,4,8}` (with T=0.2, n=8 generations) for hard codegen, agentic, and math benchmarks on `rnj-1-instruct`. These illustrate the model’s potential for test-time scaling and for further domain-specialization. The base model is similarly capable of specialization to other domains different from our post-training if needed.
83
+
84
+ <p align="center">
85
+ <img src="https://raw.githubusercontent.com/Essential-AI/rnj-1-assets/refs/heads/main/assets/rnj-1-pass-at-k.png" width="80%" alt="Pass at k evals"/>
86
+ </p>
87
+
88
+ Sidenote: Here is a [screen recording](https://vimeo.com/1143712958/c66dda13f3?share=copy&fl=sv&fe=ci) of `rnj-1-instruct` helping us make an early version of this chart.
89
+
90
+ ### Highlights of abilities
91
+
92
+ - **Code generation:** Both `rnj-1-instruct` and `rnj-1` demonstrate strong code generation abilities as measured on tasks like HumanEval+, MBPP+, BigCodeBench, and LiveCodeBench v6. Both models compete with the strongest open weight models, sometimes outperforming even larger models such as GPT OSS 20B. We measured code comprehension abilities using the task of predicting inputs given outputs and vice-versa, Crux-IO. We find our models outperform comparable baselines. For multi-lingual code generation capabilities across programming languages we measure MultiPL-E on 6 languages (C++, TypeScript, Java, JavaScript, Shell, PHP) and we find performance close to the strongest model.
93
+ - **Agentic and Tool Use:** `rnj-1-instruct` dominates the pack on agentic coding, one of our target abilities. SWE-bench performance is indicative of the model’s ability to tackle everyday software engineering tasks. The model is an order of magnitude stronger than comparably sized models on SWE-bench and approaches the capabilities available in much larger models. It scores `20.8%` on SWE-bench Verified in bash-only mode, which is higher than Gemini 2.0 flash and Qwen2.5-Coder 32B Instruct under the same agentic framework ([leaderboard](https://www.swebench.com/bash-only.html)).<br><br>
94
+ There is a surge of interest in developing models’ abilities to write performant code. `rnj-1-instruct` is able to use a profiler to iteratively improve the performance of the code it writes. For instance, on [Enamel](https://github.com/q-rz/enamel/tree/main), which measures abilities to write efficient solutions to algorithmic problems, the model outperforms all other models under the same setting.<br><br>
95
+ Furthermore, `rnj-1-instruct` surpasses comparable models in tool use performance as measured by the Berkeley Functional Calling Leaderboard (BFCL).
96
+ - **Code Infilling** : Having specifically been trained on FIM-ed pre-training data, `rnj-1` exhibits strong infilling abilities, which have been further enhanced during post-training. The base model `rnj-1` scores highly on HE-FIM-Python (avg) at 82.49% and `rnj-1-instruct` achieves 86.21%.
97
+ - **Mathematical Problem Solving:** `rnj-1-instruct` shows strong mathematical abilities across several levels of difficulty from elementary math (GSM8k), high school and undergraduate math (Minerva-MATH), and competition math (AIME ‘24 and ‘25). On harder subjects, it outcompetes or is on par with the strongest model in the pack.
98
+ - **Scientific Reasoning:** `rnj-1-instruct` exhibits long-context reasoning abilities that are needed to solve hard science and technical questions in GPQA-Diamond and SuperGPQA.
99
+
100
+ ### Demos: Rnj-1 models generalize to unseen tasks
101
+
102
+ We show a few examples of end-to-end capabilities that are usually expected of larger models.
103
+
104
+ - **Coding assistant:** `rnj-1-instruct` can operate in agentic mode to create a playable game in a single shot inside of Cline: [screen recording](https://vimeo.com/1143853378/8df3376a1a?share=copy&fl=sv&fe=ci).
105
+ - **Agentic use:** `rnj-1-instruct` functions seamlessy within the agentic framework of mini-swe-agent. Given a task such as fixing an issue described in a pull request (PR), fixing a security vulnerability, or writing performant code, it is able to reason across its full context across multiple turns to solve the task. These lead to “trajectories” which are pairs of “Assistant” and “User” turns. Here are a few recordings that show the model’s reasoning abilities across these turns: 1) a SWE task of identifying coding convention violation: [screen recording](https://vimeo.com/1143841317/44adfbd044?share=copy&fl=sv&fe=ci), 2) fixing a security vulnerability: [screen recording](https://vimeo.com/1143843598/6fca2fe0bb?share=copy&fl=sv&fe=ci), 3) diagnosing code performance bottlenecks by running a profiler in the environment and iteratively improving the code: [screen recording](https://vimeo.com/1143828123/11e4d22ac7?share=copy&fl=sv&fe=ci).
106
+ - **Data analysis in an interactive chat:** `rnj-1-instruct` can work in interactive chat mode to solve a data analysis and visualization task: [screen recording](https://vimeo.com/1143831950/0e7d9c3edc?share=copy&fl=sv&fe=ci).
107
+
108
+ # Architecture
109
+
110
+ Rnj-1's architecture is similar to Gemma 3, except that it uses only global attention, and YaRN for long-context extension.
111
+
112
+ | Hyperparameter | Value |
113
+ |:---:|:---:|
114
+ | **Total Parameters** | 8.3B |
115
+ | **Number of Layers** | 32 |
116
+ | **Model Dimension** | 4096 |
117
+ | **MLP Dimension** | 16384 |
118
+ | **Number of Attention Heads** | 32 |
119
+ | **Number of Key-Value Heads** | 8 |
120
+ | **Attention Head Dimension** | 128 |
121
+ | **Vocabulary Size** | 128K |
122
+ | **Pretrain Context Length** | 8K |
123
+ | **Context Length** | 32K |
124
+ | **Activation Function** | GeGLU |
125
+ | **Tied Embeddings?** | Yes |
126
+
127
+ ### Training Dynamics
128
+
129
+ `rnj-1` was pre-trained on 8.4T tokens with an 8K context length, after which the model’s context window was extended to **32K** through an additional 380B-token mid-training stage. A final 150B-token SFT stage completed the training to produce `rnj-1-instruct`.
130
+
131
+ We used the Muon optimizer throughout all phases. Pre-training followed the WSD learning-rate schedule, consisting of:
132
+
133
+ - Warmup: Linear ramp-up from 0 to 2e-3 over the first 5K steps.
134
+ - Stable phase: Constant learning rate of 2e-3 from 5K → 230K steps.
135
+ - Decay: Cosine decay from 2e-3 → 2e-5 from 230K → 380K steps.
136
+ - Final stable phase: Constant 2e-5 learning rate from 380K → 443.5K steps, concluding pre-training.
137
+
138
+ Both the mid-training (context-extension phase) and SFT were trained at a fixed learning rate of 2e-5.
139
+
140
+ The global batch sizes used were:
141
+
142
+ - 18M tokens for pre-training.
143
+ - 24M tokens for mid-training.
144
+ - 16M tokens for SFT.
145
+
146
+ # Recommendations
147
+
148
+ ### Temperature
149
+
150
+ We recommend using temperatures in the range [0, 0.6] for `rnj-1-instruct`.
151
+
152
+ ### Propensity to write code
153
+
154
+ Rnj-1 models have a strong inclination to write code, even for non-code tasks. This is especially true for `rnj-1-instruct` if the system prompt is omitted. Provide an appropriate system prompt, e.g., “You are a helpful assistant”, along with global task needs to steer the model’s responses in the desired direction.
155
+
156
+ # How to use
157
+
158
+ ## Serverless API and online playgrounds
159
+
160
+ - Together.AI: Rnj-1 Instruct is available via API on the [Together.ai](http://Together.ai) model platform for serverless inference. It’s also available in the Together.ai playground for quick and easy experimentation.
161
+ - HuggingFace: Rnj-1 Instruct is also hosted via [Hugging Face Spaces](https://huggingface.co/spaces/EssentialAI/rnj-1-instruct-space).
162
+
163
+ ## Running Rnj-1 locally
164
+
165
+ ### Running Rnj-1 on your laptop with llama.cpp
166
+
167
+ The easiest way to run Rnj-1 on a laptop is via [llama.cpp](https://github.com/ggml-org/llama.cpp). A pre-quantized checkpoint is available [here](https://huggingface.co/EssentialAI/rnj-1-instruct-GGUF) as well as instructions to get started.
168
+
169
+ ### Use with transformers
170
+
171
+ Rnj-1 is supported starting from transformers `4.51.2`
172
+
173
+ 1. Example code for querying model without tools
174
+
175
+ ```python
176
+ import torch
177
+ from transformers import AutoTokenizer, AutoModelForCausalLM
178
+ import os
179
+
180
+ model_id = "EssentialAI/rnj-1-instruct"
181
+ os.environ["HF_TOKEN"] = <YOUR-HF-TOKEN>
182
+
183
+ print(f"Loading model: {model_id}...")
184
+ model = AutoModelForCausalLM.from_pretrained(
185
+ model_id,
186
+ dtype=torch.bfloat16,
187
+ device_map="auto",
188
+ )
189
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
190
+
191
+ print("Model and tokenizer loaded successfully.")
192
+
193
+ messages = [
194
+ {"role": "system", "content": "You are a helpful AI assistant."}, # Optional system message
195
+ {"role": "user", "content": "Who are you?"}
196
+ ]
197
+
198
+ input_ids = tokenizer.apply_chat_template(
199
+ messages,
200
+ add_generation_prompt=True,
201
+ return_tensors="pt"
202
+ ).to(model.device)
203
+
204
+ # --- Generate Prediction --- #
205
+ print("Generating prediction...")
206
+ output_ids = model.generate(
207
+ input_ids,
208
+ max_new_tokens=50,
209
+ pad_token_id=tokenizer.eos_token_id,
210
+ do_sample=True,
211
+ temperature=0.2,
212
+ top_p=0.95
213
+ )
214
+
215
+ response = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
216
+ print(response)
217
+ ```
218
+
219
+
220
+ 1. Example code for querying with tools
221
+
222
+ Rnj-1 supports tool-calling which can be parsed by `hermes` tool-call parser. The tool calls are formatted inside `<tool_call>` and `</tool_call>` tags.
223
+ An example usage is as follows:
224
+
225
+ ```python
226
+ tools = [
227
+ {
228
+ "type": "function",
229
+ "function": {
230
+ "name": "get_weather",
231
+ "description": "Get the current weather in a given location",
232
+ "parameters": {
233
+ "type": "object",
234
+ "properties": {
235
+ "location": {"type": "string", "description": "City and state, e.g., 'San Francisco, CA'"},
236
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
237
+ },
238
+ "required": ["location", "unit"],
239
+ },
240
+ },
241
+ },
242
+ ]
243
+
244
+ messages = [
245
+ {"role": "system", "content": "You are a helpful AI assistant."}, # Optional system message
246
+ {"role": "user", "content": "What is the weather in San Francisco, CA in Celsius?"}
247
+ ]
248
+
249
+ input_ids = tokenizer.apply_chat_template(
250
+ messages,
251
+ tools=tools,
252
+ add_generation_prompt=True,
253
+ return_tensors="pt"
254
+ ).to(model.device)
255
+
256
+ # --- Generate Prediction --- #
257
+ print("Generating prediction...")
258
+ output_ids = model.generate(
259
+ input_ids,
260
+ max_new_tokens=200,
261
+ pad_token_id=tokenizer.eos_token_id,
262
+ do_sample=True,
263
+ temperature=0.2,
264
+ top_p=0.95
265
+ )
266
+
267
+ response = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=False)
268
+ # NOTE: skip_special_tokens is set to False.
269
+ print(response)
270
+ ```
271
+
272
+
273
+ 1. Example code for fill-in-the-middle (FIM)
274
+
275
+ Rnj-1 supports FIM, we show an example payload to trigger FIM mode for Rnj-1 below:
276
+
277
+ ```python
278
+ PRE = "<|pre_fim|>"
279
+ MID = "<|mid_fim|>"
280
+ SUF = "<|suf_fim|>"
281
+
282
+ prefix = """def binary_search(arr, target):
283
+ lo = 0
284
+ hi = len(arr) - 1
285
+
286
+ while lo <= hi:
287
+ """
288
+
289
+ suffix = """
290
+ return -1
291
+ """
292
+
293
+ input = PRE + prefix + SUF + suffix + MID
294
+
295
+ messages = [
296
+ {"role": "system", "content": "You are a helpful AI assistant."},
297
+ {"role": "user", "content": input}
298
+ ]
299
+
300
+ input_ids = tokenizer.apply_chat_template(
301
+ messages,
302
+ tools=tools,
303
+ add_generation_prompt=True,
304
+ return_tensors="pt"
305
+ ).to(model.device)
306
+
307
+ # --- Generate Prediction --- #
308
+ print("Generating prediction...")
309
+ output_ids = model.generate(
310
+ input_ids,
311
+ max_new_tokens=100,
312
+ pad_token_id=tokenizer.eos_token_id,
313
+ do_sample=True,
314
+ temperature=0.2,
315
+ top_p=0.95
316
+ )
317
+
318
+ response = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=False)
319
+ print(response)
320
+ ```
321
+
322
+
323
+ ### Serving Rnj-1 on GPUs
324
+
325
+ ### **vLLM**
326
+
327
+ On machines that run vLLM, it’s as easy as:
328
+
329
+ ```bash
330
+ vllm serve EssentialAI/rnj-1-instruct
331
+ ```
332
+
333
+ To launch a vLLM server with tool-calling support enabled:
334
+
335
+ ```python
336
+ vllm serve EssentialAI/rnj-1-instruct --enable-auto-tool-choice --tool-call-parser hermes
337
+ ```
338
+
339
+ ### SGLang
340
+
341
+ On machines that run SGLang, it’s as easy as:
342
+
343
+ ```bash
344
+ python3 -m sglang.launch_server --model EssentialAI/rnj-1-instruct
345
+ ```
346
+
347
+ ## IDEs and Agents: Claude Code, Cline, Mini-SWE-Agent
348
+
349
+ ### Use with Cline
350
+
351
+ Rnj-1 works great with Cline, an open source AI coding agent, and is very easy to set up.
352
+
353
+ The Cline extension is available for VS Code / Cursor, JetBrains IDEs (IntelliJ, PyCharm, WebStorm, etc.) and VSCodium / Windsurf.
354
+
355
+ Simply add the Cline extension to your favorite IDE (see instructions [here](https://docs.cline.bot/getting-started/installing-cline)) and then enter the details for your Rnj-1 endpoint (instructions [here](https://docs.cline.bot/getting-started/selecting-your-model)).
356
+
357
+ ### Use with Claude Code
358
+
359
+ To use Rnj-1 with Claude Code, you can use https://github.com/musistudio/claude-code-router. Follow the instructions to set up Claude Code and Claude Code Router at https://github.com/musistudio/claude-code-router/blob/main/README.md.
360
+
361
+ ### Agentic mode with Mini-SWE-Agent
362
+
363
+ Clone the EssentialAI fork of mini-swe-agent ([github](https://github.com/Essential-AI/eai-mini-swe-agent#)). Inside the repo, run the following inside a `virtualenv`:
364
+
365
+ ```python
366
+ git checkout eai
367
+ pip install -e .
368
+ export TOGETHER_API_KEY="..." # set this to your Together.AI access key
369
+
370
+ # use EssentialAI/rnj-1-instruct to solve a performance optimization task
371
+ mini-extra perf-single [--instance <k>]
372
+ # use EssentialAI/rnj-1-instruct to resolve a SWE PR description
373
+ mini-extra swebench-single [--instance <k>]
374
+ ```
375
+
376
+ # Known limitations
377
+
378
+ ### Hallucinations and factual inaccuracies
379
+
380
+ Rnj-1 is primarily a coding and STEM model. Hence, it is not optimized for factual recovery.
381
+
382
+ ### Identity and knowledge cutoff
383
+
384
+ Rnj-1 is trained on online web data, and we have observed that it sometimes confuses its identity with other model providers. We believe this is due to a variety of reasons, including references to language models from other providers, model generated data, etc. We hope to rectify this in our follow-up release.
385
+
386
+ Additionally, Rnj-1 has not been trained or provided with a knowledge cutoff date and may therefore respond with information coming from its training data. If specifically asked for its knowledge cutoff date, the model may hallucinate a date.
387
+
388
+ # **License**
389
+
390
+ This repository and the model weights are licensed under [**the Apache License, Version 2.0 (Apache 2.0)**](https://huggingface.co/EssentialAI/rnj-1-instruct/blob/main/LICENSE).
391
+
392
+ # **Contact**
393
+
394
+ We welcome your questions and feedback. You can contact us at info@essential.ai.
config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": 30.0,
12
+ "head_dim": 128,
13
+ "hidden_act": "gelu_pytorch_tanh",
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 4096,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 16384,
18
+ "layer_type": [
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention"
51
+ ],
52
+ "max_position_embeddings": 32768,
53
+ "model_type": "gemma3_text",
54
+ "num_attention_heads": 32,
55
+ "num_hidden_layers": 32,
56
+ "num_key_value_heads": 8,
57
+ "pad_token_id": 0,
58
+ "query_pre_attn_scalar": 128,
59
+ "rms_norm_eps": 1e-06,
60
+ "rope_local_base_freq": 10000,
61
+ "rope_scaling": {
62
+ "attn_factor": 1.0,
63
+ "beta_fast": 64.0,
64
+ "beta_slow": 1.0,
65
+ "extrapolation_factor": 1.0,
66
+ "factor": 4.0,
67
+ "original_max_position_embeddings": 8192,
68
+ "rope_type": "yarn"
69
+ },
70
+ "rope_theta": 10000,
71
+ "sliding_window": 32768,
72
+ "sliding_window_pattern": 1,
73
+ "torch_dtype": "float32",
74
+ "transformers_version": "4.51.2",
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "cache_implementation": "hybrid",
5
+ "eos_token_id": 128009,
6
+ "pad_token_id": 128001,
7
+ "temperature": 0.2,
8
+ "do_sample": true,
9
+ "transformers_version": "4.51.2"
10
+ }
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922f2ff49c70cbd9076dce439e1d437a584adeecc4e6fb1bf04295f45e07ded9
3
+ size 4752284616
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53ef66dad0c2af80c1ae5183ca2b19c76c7a32888def60d25d020c8be10a72a1
3
+ size 4865732920
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3594811dc502a36767199011783d2eaeaa4f3cd8889eae3c4656ee35b2a6fb64
3
+ size 4865732944
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16204bd67eb19e7d9a2d55dfa0d56eb7eba9be578ccf1af1a7704c43a55eb626
3
+ size 4865732984
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6454457f918acb5cf79b4d4b2b8967424bca06d62742c16c84606a6d427f4d05
3
+ size 4865732984
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:980a9628d4e7f405b72eb1e191384bd468e3f4b5199adcfd2c7dbe9a4b38761a
3
+ size 4865732984
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb4270d5e0a3da92426b95fc79a16a3959b8468995a48af9989cfff2d76f506d
3
+ size 4161104448
model.safetensors.index.json ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 33242005504
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00007.safetensors",
7
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors",
8
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
9
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
10
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
11
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
12
+ "model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00007.safetensors",
13
+ "model.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00007.safetensors",
14
+ "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00007.safetensors",
15
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
16
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
17
+ "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00007.safetensors",
18
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
25
+ "model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00007.safetensors",
26
+ "model.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00007.safetensors",
27
+ "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00007.safetensors",
28
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
29
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
30
+ "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00007.safetensors",
31
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
32
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
33
+ "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors",
34
+ "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
35
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
36
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
37
+ "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
38
+ "model.layers.10.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
39
+ "model.layers.10.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
40
+ "model.layers.10.self_attn.k_norm.weight": "model-00003-of-00007.safetensors",
41
+ "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
42
+ "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
43
+ "model.layers.10.self_attn.q_norm.weight": "model-00003-of-00007.safetensors",
44
+ "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
45
+ "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
46
+ "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors",
47
+ "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
48
+ "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
49
+ "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
50
+ "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
51
+ "model.layers.11.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
52
+ "model.layers.11.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
53
+ "model.layers.11.self_attn.k_norm.weight": "model-00003-of-00007.safetensors",
54
+ "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
55
+ "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
56
+ "model.layers.11.self_attn.q_norm.weight": "model-00003-of-00007.safetensors",
57
+ "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
58
+ "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
59
+ "model.layers.12.input_layernorm.weight": "model-00004-of-00007.safetensors",
60
+ "model.layers.12.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
61
+ "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
62
+ "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
63
+ "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
64
+ "model.layers.12.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
65
+ "model.layers.12.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
66
+ "model.layers.12.self_attn.k_norm.weight": "model-00003-of-00007.safetensors",
67
+ "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
68
+ "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
69
+ "model.layers.12.self_attn.q_norm.weight": "model-00003-of-00007.safetensors",
70
+ "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
71
+ "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
72
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00007.safetensors",
73
+ "model.layers.13.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
74
+ "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
75
+ "model.layers.13.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
76
+ "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
77
+ "model.layers.13.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
78
+ "model.layers.13.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
79
+ "model.layers.13.self_attn.k_norm.weight": "model-00004-of-00007.safetensors",
80
+ "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
81
+ "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
82
+ "model.layers.13.self_attn.q_norm.weight": "model-00004-of-00007.safetensors",
83
+ "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
84
+ "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
85
+ "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors",
86
+ "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
87
+ "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
88
+ "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
89
+ "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
90
+ "model.layers.14.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
91
+ "model.layers.14.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
92
+ "model.layers.14.self_attn.k_norm.weight": "model-00004-of-00007.safetensors",
93
+ "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
94
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
95
+ "model.layers.14.self_attn.q_norm.weight": "model-00004-of-00007.safetensors",
96
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
97
+ "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
98
+ "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors",
99
+ "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
100
+ "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
101
+ "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
102
+ "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
103
+ "model.layers.15.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
104
+ "model.layers.15.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
105
+ "model.layers.15.self_attn.k_norm.weight": "model-00004-of-00007.safetensors",
106
+ "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
107
+ "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
108
+ "model.layers.15.self_attn.q_norm.weight": "model-00004-of-00007.safetensors",
109
+ "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
110
+ "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
111
+ "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors",
112
+ "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
113
+ "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
114
+ "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
115
+ "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
116
+ "model.layers.16.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
117
+ "model.layers.16.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors",
118
+ "model.layers.16.self_attn.k_norm.weight": "model-00004-of-00007.safetensors",
119
+ "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
120
+ "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
121
+ "model.layers.16.self_attn.q_norm.weight": "model-00004-of-00007.safetensors",
122
+ "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
123
+ "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
124
+ "model.layers.17.input_layernorm.weight": "model-00005-of-00007.safetensors",
125
+ "model.layers.17.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
126
+ "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
127
+ "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
128
+ "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
129
+ "model.layers.17.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
130
+ "model.layers.17.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
131
+ "model.layers.17.self_attn.k_norm.weight": "model-00004-of-00007.safetensors",
132
+ "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
133
+ "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
134
+ "model.layers.17.self_attn.q_norm.weight": "model-00004-of-00007.safetensors",
135
+ "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
136
+ "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
137
+ "model.layers.18.input_layernorm.weight": "model-00005-of-00007.safetensors",
138
+ "model.layers.18.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
139
+ "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
140
+ "model.layers.18.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
141
+ "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
142
+ "model.layers.18.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
143
+ "model.layers.18.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
144
+ "model.layers.18.self_attn.k_norm.weight": "model-00005-of-00007.safetensors",
145
+ "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
146
+ "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
147
+ "model.layers.18.self_attn.q_norm.weight": "model-00005-of-00007.safetensors",
148
+ "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
149
+ "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
150
+ "model.layers.19.input_layernorm.weight": "model-00005-of-00007.safetensors",
151
+ "model.layers.19.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
152
+ "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
153
+ "model.layers.19.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
154
+ "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
155
+ "model.layers.19.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
156
+ "model.layers.19.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
157
+ "model.layers.19.self_attn.k_norm.weight": "model-00005-of-00007.safetensors",
158
+ "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
159
+ "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
160
+ "model.layers.19.self_attn.q_norm.weight": "model-00005-of-00007.safetensors",
161
+ "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
162
+ "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
163
+ "model.layers.2.input_layernorm.weight": "model-00002-of-00007.safetensors",
164
+ "model.layers.2.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
165
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
166
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
167
+ "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
168
+ "model.layers.2.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
169
+ "model.layers.2.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
170
+ "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00007.safetensors",
171
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
172
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
173
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00007.safetensors",
174
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
175
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
176
+ "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors",
177
+ "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
178
+ "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
179
+ "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
180
+ "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
181
+ "model.layers.20.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
182
+ "model.layers.20.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
183
+ "model.layers.20.self_attn.k_norm.weight": "model-00005-of-00007.safetensors",
184
+ "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
185
+ "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
186
+ "model.layers.20.self_attn.q_norm.weight": "model-00005-of-00007.safetensors",
187
+ "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
188
+ "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
189
+ "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors",
190
+ "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
191
+ "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
192
+ "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
193
+ "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
194
+ "model.layers.21.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
195
+ "model.layers.21.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors",
196
+ "model.layers.21.self_attn.k_norm.weight": "model-00005-of-00007.safetensors",
197
+ "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
198
+ "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
199
+ "model.layers.21.self_attn.q_norm.weight": "model-00005-of-00007.safetensors",
200
+ "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
201
+ "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
202
+ "model.layers.22.input_layernorm.weight": "model-00006-of-00007.safetensors",
203
+ "model.layers.22.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
204
+ "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
205
+ "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
206
+ "model.layers.22.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
207
+ "model.layers.22.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
208
+ "model.layers.22.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
209
+ "model.layers.22.self_attn.k_norm.weight": "model-00005-of-00007.safetensors",
210
+ "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
211
+ "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
212
+ "model.layers.22.self_attn.q_norm.weight": "model-00005-of-00007.safetensors",
213
+ "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
214
+ "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
215
+ "model.layers.23.input_layernorm.weight": "model-00006-of-00007.safetensors",
216
+ "model.layers.23.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
217
+ "model.layers.23.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
218
+ "model.layers.23.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
219
+ "model.layers.23.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
220
+ "model.layers.23.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
221
+ "model.layers.23.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
222
+ "model.layers.23.self_attn.k_norm.weight": "model-00006-of-00007.safetensors",
223
+ "model.layers.23.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
224
+ "model.layers.23.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
225
+ "model.layers.23.self_attn.q_norm.weight": "model-00006-of-00007.safetensors",
226
+ "model.layers.23.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
227
+ "model.layers.23.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
228
+ "model.layers.24.input_layernorm.weight": "model-00006-of-00007.safetensors",
229
+ "model.layers.24.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
230
+ "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
231
+ "model.layers.24.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
232
+ "model.layers.24.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
233
+ "model.layers.24.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
234
+ "model.layers.24.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
235
+ "model.layers.24.self_attn.k_norm.weight": "model-00006-of-00007.safetensors",
236
+ "model.layers.24.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
237
+ "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
238
+ "model.layers.24.self_attn.q_norm.weight": "model-00006-of-00007.safetensors",
239
+ "model.layers.24.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
240
+ "model.layers.24.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
241
+ "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors",
242
+ "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
243
+ "model.layers.25.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
244
+ "model.layers.25.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
245
+ "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
246
+ "model.layers.25.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
247
+ "model.layers.25.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
248
+ "model.layers.25.self_attn.k_norm.weight": "model-00006-of-00007.safetensors",
249
+ "model.layers.25.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
250
+ "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
251
+ "model.layers.25.self_attn.q_norm.weight": "model-00006-of-00007.safetensors",
252
+ "model.layers.25.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
253
+ "model.layers.25.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
254
+ "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors",
255
+ "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
256
+ "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
257
+ "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
258
+ "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
259
+ "model.layers.26.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
260
+ "model.layers.26.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors",
261
+ "model.layers.26.self_attn.k_norm.weight": "model-00006-of-00007.safetensors",
262
+ "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
263
+ "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
264
+ "model.layers.26.self_attn.q_norm.weight": "model-00006-of-00007.safetensors",
265
+ "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
266
+ "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
267
+ "model.layers.27.input_layernorm.weight": "model-00007-of-00007.safetensors",
268
+ "model.layers.27.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
269
+ "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
270
+ "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
271
+ "model.layers.27.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
272
+ "model.layers.27.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
273
+ "model.layers.27.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
274
+ "model.layers.27.self_attn.k_norm.weight": "model-00006-of-00007.safetensors",
275
+ "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
276
+ "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
277
+ "model.layers.27.self_attn.q_norm.weight": "model-00006-of-00007.safetensors",
278
+ "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
279
+ "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
280
+ "model.layers.28.input_layernorm.weight": "model-00007-of-00007.safetensors",
281
+ "model.layers.28.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
282
+ "model.layers.28.mlp.gate_proj.weight": "model-00007-of-00007.safetensors",
283
+ "model.layers.28.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
284
+ "model.layers.28.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
285
+ "model.layers.28.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
286
+ "model.layers.28.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
287
+ "model.layers.28.self_attn.k_norm.weight": "model-00007-of-00007.safetensors",
288
+ "model.layers.28.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
289
+ "model.layers.28.self_attn.o_proj.weight": "model-00007-of-00007.safetensors",
290
+ "model.layers.28.self_attn.q_norm.weight": "model-00007-of-00007.safetensors",
291
+ "model.layers.28.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
292
+ "model.layers.28.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
293
+ "model.layers.29.input_layernorm.weight": "model-00007-of-00007.safetensors",
294
+ "model.layers.29.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
295
+ "model.layers.29.mlp.gate_proj.weight": "model-00007-of-00007.safetensors",
296
+ "model.layers.29.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
297
+ "model.layers.29.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
298
+ "model.layers.29.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
299
+ "model.layers.29.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
300
+ "model.layers.29.self_attn.k_norm.weight": "model-00007-of-00007.safetensors",
301
+ "model.layers.29.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
302
+ "model.layers.29.self_attn.o_proj.weight": "model-00007-of-00007.safetensors",
303
+ "model.layers.29.self_attn.q_norm.weight": "model-00007-of-00007.safetensors",
304
+ "model.layers.29.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
305
+ "model.layers.29.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
306
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors",
307
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
308
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
309
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
310
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
311
+ "model.layers.3.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
312
+ "model.layers.3.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
313
+ "model.layers.3.self_attn.k_norm.weight": "model-00002-of-00007.safetensors",
314
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
315
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
316
+ "model.layers.3.self_attn.q_norm.weight": "model-00002-of-00007.safetensors",
317
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
318
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
319
+ "model.layers.30.input_layernorm.weight": "model-00007-of-00007.safetensors",
320
+ "model.layers.30.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
321
+ "model.layers.30.mlp.gate_proj.weight": "model-00007-of-00007.safetensors",
322
+ "model.layers.30.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
323
+ "model.layers.30.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
324
+ "model.layers.30.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
325
+ "model.layers.30.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
326
+ "model.layers.30.self_attn.k_norm.weight": "model-00007-of-00007.safetensors",
327
+ "model.layers.30.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
328
+ "model.layers.30.self_attn.o_proj.weight": "model-00007-of-00007.safetensors",
329
+ "model.layers.30.self_attn.q_norm.weight": "model-00007-of-00007.safetensors",
330
+ "model.layers.30.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
331
+ "model.layers.30.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
332
+ "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors",
333
+ "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
334
+ "model.layers.31.mlp.gate_proj.weight": "model-00007-of-00007.safetensors",
335
+ "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
336
+ "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
337
+ "model.layers.31.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
338
+ "model.layers.31.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors",
339
+ "model.layers.31.self_attn.k_norm.weight": "model-00007-of-00007.safetensors",
340
+ "model.layers.31.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
341
+ "model.layers.31.self_attn.o_proj.weight": "model-00007-of-00007.safetensors",
342
+ "model.layers.31.self_attn.q_norm.weight": "model-00007-of-00007.safetensors",
343
+ "model.layers.31.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
344
+ "model.layers.31.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
345
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
346
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
347
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
348
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
349
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
350
+ "model.layers.4.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
351
+ "model.layers.4.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
352
+ "model.layers.4.self_attn.k_norm.weight": "model-00002-of-00007.safetensors",
353
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
354
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
355
+ "model.layers.4.self_attn.q_norm.weight": "model-00002-of-00007.safetensors",
356
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
357
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
358
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors",
359
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
360
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
361
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
362
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
363
+ "model.layers.5.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
364
+ "model.layers.5.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
365
+ "model.layers.5.self_attn.k_norm.weight": "model-00002-of-00007.safetensors",
366
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
367
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
368
+ "model.layers.5.self_attn.q_norm.weight": "model-00002-of-00007.safetensors",
369
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
370
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
371
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors",
372
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
373
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
374
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
375
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
376
+ "model.layers.6.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
377
+ "model.layers.6.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors",
378
+ "model.layers.6.self_attn.k_norm.weight": "model-00002-of-00007.safetensors",
379
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
380
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
381
+ "model.layers.6.self_attn.q_norm.weight": "model-00002-of-00007.safetensors",
382
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
383
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
384
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00007.safetensors",
385
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
386
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
387
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
388
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
389
+ "model.layers.7.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
390
+ "model.layers.7.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
391
+ "model.layers.7.self_attn.k_norm.weight": "model-00002-of-00007.safetensors",
392
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
393
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
394
+ "model.layers.7.self_attn.q_norm.weight": "model-00002-of-00007.safetensors",
395
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
396
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
397
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors",
398
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
399
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
400
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
401
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
402
+ "model.layers.8.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
403
+ "model.layers.8.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
404
+ "model.layers.8.self_attn.k_norm.weight": "model-00003-of-00007.safetensors",
405
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
406
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
407
+ "model.layers.8.self_attn.q_norm.weight": "model-00003-of-00007.safetensors",
408
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
409
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
410
+ "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors",
411
+ "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
412
+ "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
413
+ "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
414
+ "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
415
+ "model.layers.9.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
416
+ "model.layers.9.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors",
417
+ "model.layers.9.self_attn.k_norm.weight": "model-00003-of-00007.safetensors",
418
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
419
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
420
+ "model.layers.9.self_attn.q_norm.weight": "model-00003-of-00007.safetensors",
421
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
422
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
423
+ "model.norm.weight": "model-00007-of-00007.safetensors"
424
+ }
425
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ }
16
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:050a14faefc0c4f12075579c1676a9a6ca207801b4218c64f69cd7553bdda1b8
3
+ size 17209796
tokenizer_config.json ADDED
@@ -0,0 +1,2063 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|pre_fim|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|suf_fim|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|mid_fim|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|eoc_fim|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|exec_start|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|exec_end|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|exec_error|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<tool_call>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "</tool_call>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|file_name_start|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|file_name_end|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) -%}\n{%- set emit = namespace(started=false) -%}\n\n{# ---------- Build base system message (always emitted) ---------- #}\n{%- set base_system = 'You are rnj-1, a foundation model trained by Essential AI.\\n' -%}\n\n{# ---------- Optional tools preface as a synthetic system message ---------- #}\n{%- if tools %}\n {%- set sys_preamble -%}\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{%- for tool in tools %}\n{{ \"\\n\" ~ (tool | tojson) }}\n{% endfor %}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n {%- endset -%}\n\n {# If the first user-provided message is system, include it above the tools preface #}\n {%- set combined_system = (messages and messages[0].role == 'system')\n and (messages[0].content is string) -%}\n {%- set sys_content = (combined_system and (messages[0].content ~ \"\\n\\n\" ~ sys_preamble)) or sys_preamble -%}\n\n {%- set content = '<|start_header_id|>system<|end_header_id|>\\n' ~ base_system ~ '\\n' ~ sys_content ~ '<|eot_id|>' -%}\n {%- if not emit.started -%}{%- set content = bos_token ~ content -%}{%- set emit.started = true -%}{%- endif -%}\n {{- content -}}\n{%- else %}\n {# No tools: always emit base_system, and include user's system message if present #}\n {%- set user_system_content = '' -%}\n {%- if messages and messages[0].role == 'system' and (messages[0].content is string) -%}\n {%- set user_system_content = '\\n' ~ messages[0].content -%}\n {%- endif -%}\n {%- set content = '<|start_header_id|>system<|end_header_id|>\\n' ~ base_system ~ user_system_content ~ '<|eot_id|>' -%}\n {%- if not emit.started -%}{%- set content = bos_token ~ content -%}{%- set emit.started = true -%}{%- endif -%}\n {{- content -}}\n{%- endif -%}\n\n{# ---------- Locate last user query for multi-step tool behavior ---------- #}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 -%}\n {%- if ns.multi_step_tool\n and message.role == \"user\"\n and message.content is string\n and not (message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) -%}\n {%- set ns.multi_step_tool = false -%}\n {%- set ns.last_query_index = index -%}\n {%- endif -%}\n{%- endfor -%}\n\n{# ---------- Walk all messages and emit in Llama-3 format ---------- #}\n{%- for message in messages %}\n {# normalize content #}\n {%- if message.content is string -%}\n {%- set content = message.content -%}\n {%- else -%}\n {%- set content = '' -%}\n {%- endif -%}\n\n {# --- user/system (non-initial system already handled above) --- #}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) -%}\n {%- set block = '<|start_header_id|>' ~ message.role ~ '<|end_header_id|>\\n' ~ content ~ '<|eot_id|>' -%}\n {%- if not emit.started -%}{%- set block = bos_token ~ block -%}{%- set emit.started = true -%}{%- endif -%}\n {{- block -}}\n\n {# --- assistant --- #}\n {%- elif message.role == \"assistant\" -%}\n \n {%- set body = content -%}\n {%- set header = '<|start_header_id|>assistant<|end_header_id|>\\n' -%}\n {%- if not emit.started -%}{{ bos_token }}{%- set emit.started = true -%}{%- endif -%}\n {{- header -}}\n {% generation %}\n {{- body -}}\n {%- if message.tool_calls -%}\n {%- for tool_call in message.tool_calls -%}\n {%- if tool_call.function -%}{%- set tc = tool_call.function -%}{%- else -%}{%- set tc = tool_call -%}{%- endif -%}\n {%- set args_json = (tc.arguments if (tc.arguments is string) else (tc.arguments | tojson)) -%}\n {%- if loop.first -%}\n {{- '<tool_call>\\n{\"name\": \"' ~ tc.name ~ '\", \"arguments\": ' ~ args_json ~ '}\\n</tool_call>' -}}\n {%- else -%}\n {{- '\\n<tool_call>\\n{\"name\": \"' ~ tc.name ~ '\", \"arguments\": ' ~ args_json ~ '}\\n</tool_call>' -}}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {{- '<|eot_id|>' -}}{%- endgeneration -%}\n {# --- tool messages are wrapped as synthetic user messages with <tool_response> --- #}\n {%- elif message.role == \"tool\" -%}\n {%- set open_user = (loop.first or (loop.index0 > 0 and messages[loop.index0 - 1].role != \"tool\")) -%}\n {%- set close_user = (loop.last or (loop.index0 < messages|length - 1 and messages[loop.index0 + 1].role != \"tool\")) -%}\n\n {%- if open_user -%}\n {%- set header = '<|start_header_id|>user<|end_header_id|>\\n' -%}\n {%- if not emit.started -%}{%- set header = bos_token ~ header -%}{%- set emit.started = true -%}{%- endif -%}\n {{- header -}}\n {%- endif -%}\n {%- if open_user -%}\n {{- '<tool_response>\\n' -}}\n {%- else -%}\n {{- '\\n<tool_response>\\n' -}}\n {%- endif -%}\n {{- content -}}\n {{- '\\n</tool_response>' -}}\n\n {%- if close_user -%}\n {{- '<|eot_id|>' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n\n{# ---------- Add generation prompt header for the model to continue ---------- #}\n{%- if add_generation_prompt -%}\n {%- set tail = '<|start_header_id|>assistant<|end_header_id|>\\n' -%}\n {{- tail -}}\n{%- endif -%}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "extra_special_tokens": {},
2057
+ "model_input_names": [
2058
+ "input_ids",
2059
+ "attention_mask"
2060
+ ],
2061
+ "model_max_length": 1000000000000000019884624838656,
2062
+ "tokenizer_class": "PreTrainedTokenizer"
2063
+ }