Upload convert_to_ggml.ipynb
Browse filesadding conversion script convert_to_ggml.ipynb
- convert_to_ggml.ipynb +273 -0
convert_to_ggml.ipynb
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"provenance": []
|
7 |
+
},
|
8 |
+
"kernelspec": {
|
9 |
+
"name": "python3",
|
10 |
+
"display_name": "Python 3"
|
11 |
+
},
|
12 |
+
"language_info": {
|
13 |
+
"name": "python"
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"cells": [
|
17 |
+
{
|
18 |
+
"cell_type": "markdown",
|
19 |
+
"source": [
|
20 |
+
"## Converting to ggml using h5"
|
21 |
+
],
|
22 |
+
"metadata": {
|
23 |
+
"id": "Kbq33zFd4QXE"
|
24 |
+
}
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"source": [
|
29 |
+
"!pip install torch safetensors h5py -q"
|
30 |
+
],
|
31 |
+
"metadata": {
|
32 |
+
"colab": {
|
33 |
+
"base_uri": "https://localhost:8080/"
|
34 |
+
},
|
35 |
+
"id": "M43EPtRB33cE",
|
36 |
+
"outputId": "5081aebc-8864-48f4-c869-683e9511f082"
|
37 |
+
},
|
38 |
+
"execution_count": null,
|
39 |
+
"outputs": [
|
40 |
+
{
|
41 |
+
"output_type": "stream",
|
42 |
+
"name": "stdout",
|
43 |
+
"text": [
|
44 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
45 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m60.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
46 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m28.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
47 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
48 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
49 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
50 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
51 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
52 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
53 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m76.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
54 |
+
"\u001b[?25h"
|
55 |
+
]
|
56 |
+
}
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cell_type": "code",
|
61 |
+
"execution_count": null,
|
62 |
+
"metadata": {
|
63 |
+
"colab": {
|
64 |
+
"base_uri": "https://localhost:8080/"
|
65 |
+
},
|
66 |
+
"id": "e2sGa27b3pvJ",
|
67 |
+
"outputId": "5660cafb-830e-409c-9921-feb584e4481c"
|
68 |
+
},
|
69 |
+
"outputs": [
|
70 |
+
{
|
71 |
+
"output_type": "stream",
|
72 |
+
"name": "stdout",
|
73 |
+
"text": [
|
74 |
+
"Cloning into 'whisper'...\n",
|
75 |
+
"remote: Enumerating objects: 828, done.\u001b[K\n",
|
76 |
+
"remote: Counting objects: 100% (370/370), done.\u001b[K\n",
|
77 |
+
"remote: Compressing objects: 100% (69/69), done.\u001b[K\n",
|
78 |
+
"remote: Total 828 (delta 333), reused 301 (delta 301), pack-reused 458 (from 2)\u001b[K\n",
|
79 |
+
"Receiving objects: 100% (828/828), 8.26 MiB | 10.11 MiB/s, done.\n",
|
80 |
+
"Resolving deltas: 100% (496/496), done.\n"
|
81 |
+
]
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"source": [
|
85 |
+
"!git clone https://github.com/openai/whisper"
|
86 |
+
]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"cell_type": "code",
|
90 |
+
"source": [
|
91 |
+
"!git clone https://github.com/ggerganov/whisper.cpp\n",
|
92 |
+
"!cd whisper.cpp && make"
|
93 |
+
],
|
94 |
+
"metadata": {
|
95 |
+
"id": "bnJW45ez3sx0"
|
96 |
+
},
|
97 |
+
"execution_count": null,
|
98 |
+
"outputs": []
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"source": [
|
103 |
+
"!git lfs install\n",
|
104 |
+
"!git clone https://huggingface.co/sadeghk/whisper-base"
|
105 |
+
],
|
106 |
+
"metadata": {
|
107 |
+
"colab": {
|
108 |
+
"base_uri": "https://localhost:8080/"
|
109 |
+
},
|
110 |
+
"id": "K7wQ0-d53sve",
|
111 |
+
"outputId": "d1736412-81c1-4a64-c3e0-4d46093d3dcd"
|
112 |
+
},
|
113 |
+
"execution_count": null,
|
114 |
+
"outputs": [
|
115 |
+
{
|
116 |
+
"output_type": "stream",
|
117 |
+
"name": "stdout",
|
118 |
+
"text": [
|
119 |
+
"Git LFS initialized.\n",
|
120 |
+
"Cloning into 'whisper-base'...\n",
|
121 |
+
"remote: Enumerating objects: 571, done.\u001b[K\n",
|
122 |
+
"remote: Counting objects: 100% (568/568), done.\u001b[K\n",
|
123 |
+
"remote: Compressing objects: 100% (568/568), done.\u001b[K\n",
|
124 |
+
"remote: Total 571 (delta 167), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
|
125 |
+
"Receiving objects: 100% (571/571), 692.35 KiB | 3.48 MiB/s, done.\n",
|
126 |
+
"Resolving deltas: 100% (167/167), done.\n"
|
127 |
+
]
|
128 |
+
}
|
129 |
+
]
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"cell_type": "code",
|
133 |
+
"source": [
|
134 |
+
"import torch\n",
|
135 |
+
"import h5py\n",
|
136 |
+
"from safetensors.torch import load_file\n",
|
137 |
+
"\n",
|
138 |
+
"# Load the safetensors file\n",
|
139 |
+
"safetensors_path = \"whisper-base/model.safetensors\" # Replace with your file path\n",
|
140 |
+
"state_dict = load_file(safetensors_path)\n",
|
141 |
+
"\n",
|
142 |
+
"# Create an HDF5 file and store the tensors\n",
|
143 |
+
"h5_path = \"whisper-base/model.h5\"\n",
|
144 |
+
"with h5py.File(h5_path, \"w\") as h5f:\n",
|
145 |
+
" for key, tensor in state_dict.items():\n",
|
146 |
+
" h5f.create_dataset(key, data=tensor.numpy()) # Convert tensor to numpy before saving\n",
|
147 |
+
"\n",
|
148 |
+
"print(f\"Conversion complete: {h5_path} saved successfully!\")\n"
|
149 |
+
],
|
150 |
+
"metadata": {
|
151 |
+
"colab": {
|
152 |
+
"base_uri": "https://localhost:8080/"
|
153 |
+
},
|
154 |
+
"id": "b931-wDb36Bf",
|
155 |
+
"outputId": "59337d93-e872-4ed4-dc9b-7b2c29b53c89"
|
156 |
+
},
|
157 |
+
"execution_count": null,
|
158 |
+
"outputs": [
|
159 |
+
{
|
160 |
+
"output_type": "stream",
|
161 |
+
"name": "stdout",
|
162 |
+
"text": [
|
163 |
+
"Conversion complete: whisper-base/model.h5 saved successfully!\n"
|
164 |
+
]
|
165 |
+
}
|
166 |
+
]
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"cell_type": "code",
|
170 |
+
"source": [
|
171 |
+
"# if there is an error about max_length, set max_length = 448 in ./whisper-base/config.json\n",
|
172 |
+
"\n",
|
173 |
+
"!python3 ./whisper.cpp/models/convert-h5-to-ggml.py ./whisper-base/ ./whisper ."
|
174 |
+
],
|
175 |
+
"metadata": {
|
176 |
+
"id": "oNohyE-B3ss5"
|
177 |
+
},
|
178 |
+
"execution_count": null,
|
179 |
+
"outputs": []
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"cell_type": "code",
|
183 |
+
"source": [
|
184 |
+
"!./whisper.cpp/build/bin/whisper-cli -m ggml-model.bin -l fa -f cab991ea4681b712417e0d7569c94dccc48f134ed1e6353f8ee69a85.wav"
|
185 |
+
],
|
186 |
+
"metadata": {
|
187 |
+
"colab": {
|
188 |
+
"base_uri": "https://localhost:8080/"
|
189 |
+
},
|
190 |
+
"id": "yj_lHrgE3sqW",
|
191 |
+
"outputId": "b7435036-0a31-4ca5-927d-9952c72f9bcf"
|
192 |
+
},
|
193 |
+
"execution_count": null,
|
194 |
+
"outputs": [
|
195 |
+
{
|
196 |
+
"output_type": "stream",
|
197 |
+
"name": "stdout",
|
198 |
+
"text": [
|
199 |
+
"whisper_init_from_file_with_params_no_state: loading model from 'ggml-model.bin'\n",
|
200 |
+
"whisper_init_with_params_no_state: use gpu = 1\n",
|
201 |
+
"whisper_init_with_params_no_state: flash attn = 0\n",
|
202 |
+
"whisper_init_with_params_no_state: gpu_device = 0\n",
|
203 |
+
"whisper_init_with_params_no_state: dtw = 0\n",
|
204 |
+
"whisper_init_with_params_no_state: devices = 1\n",
|
205 |
+
"whisper_init_with_params_no_state: backends = 1\n",
|
206 |
+
"whisper_model_load: loading model\n",
|
207 |
+
"whisper_model_load: n_vocab = 51865\n",
|
208 |
+
"whisper_model_load: n_audio_ctx = 1500\n",
|
209 |
+
"whisper_model_load: n_audio_state = 512\n",
|
210 |
+
"whisper_model_load: n_audio_head = 8\n",
|
211 |
+
"whisper_model_load: n_audio_layer = 6\n",
|
212 |
+
"whisper_model_load: n_text_ctx = 448\n",
|
213 |
+
"whisper_model_load: n_text_state = 512\n",
|
214 |
+
"whisper_model_load: n_text_head = 8\n",
|
215 |
+
"whisper_model_load: n_text_layer = 6\n",
|
216 |
+
"whisper_model_load: n_mels = 80\n",
|
217 |
+
"whisper_model_load: ftype = 1\n",
|
218 |
+
"whisper_model_load: qntvr = 0\n",
|
219 |
+
"whisper_model_load: type = 2 (base)\n",
|
220 |
+
"whisper_model_load: adding 1607 extra tokens\n",
|
221 |
+
"whisper_model_load: n_langs = 99\n",
|
222 |
+
"whisper_model_load: CPU total size = 147.37 MB\n",
|
223 |
+
"whisper_model_load: model size = 147.37 MB\n",
|
224 |
+
"whisper_backend_init_gpu: no GPU found\n",
|
225 |
+
"whisper_init_state: kv self size = 6.29 MB\n",
|
226 |
+
"whisper_init_state: kv cross size = 18.87 MB\n",
|
227 |
+
"whisper_init_state: kv pad size = 3.15 MB\n",
|
228 |
+
"whisper_init_state: compute buffer (conv) = 16.26 MB\n",
|
229 |
+
"whisper_init_state: compute buffer (encode) = 85.86 MB\n",
|
230 |
+
"whisper_init_state: compute buffer (cross) = 4.65 MB\n",
|
231 |
+
"whisper_init_state: compute buffer (decode) = 96.35 MB\n",
|
232 |
+
"\n",
|
233 |
+
"system_info: n_threads = 2 / 2 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 0 | \n",
|
234 |
+
"\n",
|
235 |
+
"main: processing 'cab991ea4681b712417e0d7569c94dccc48f134ed1e6353f8ee69a85.wav' (167711 samples, 10.5 sec), 2 threads, 1 processors, 5 beams + best of 5, lang = fa, task = transcribe, timestamps = 1 ...\n",
|
236 |
+
"\n",
|
237 |
+
"\n",
|
238 |
+
"[00:00:00.000 --> 00:00:30.000] لطه اصلی بارش باران جا به جایی هوای مرطوب به علت اختلاف دم آوروتو و تسه که به جبههای هواشناسی معروف است\n",
|
239 |
+
"\n",
|
240 |
+
"\n",
|
241 |
+
"whisper_print_timings: load time = 165.24 ms\n",
|
242 |
+
"whisper_print_timings: fallbacks = 0 p / 0 h\n",
|
243 |
+
"whisper_print_timings: mel time = 39.49 ms\n",
|
244 |
+
"whisper_print_timings: sample time = 722.76 ms / 315 runs ( 2.29 ms per run)\n",
|
245 |
+
"whisper_print_timings: encode time = 6999.46 ms / 1 runs ( 6999.46 ms per run)\n",
|
246 |
+
"whisper_print_timings: decode time = 0.00 ms / 1 runs ( 0.00 ms per run)\n",
|
247 |
+
"whisper_print_timings: batchd time = 4939.69 ms / 313 runs ( 15.78 ms per run)\n",
|
248 |
+
"whisper_print_timings: prompt time = 0.00 ms / 1 runs ( 0.00 ms per run)\n",
|
249 |
+
"whisper_print_timings: total time = 12942.70 ms\n"
|
250 |
+
]
|
251 |
+
}
|
252 |
+
]
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"cell_type": "code",
|
256 |
+
"source": [],
|
257 |
+
"metadata": {
|
258 |
+
"id": "QN0WRNiY3snj"
|
259 |
+
},
|
260 |
+
"execution_count": null,
|
261 |
+
"outputs": []
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"cell_type": "code",
|
265 |
+
"source": [],
|
266 |
+
"metadata": {
|
267 |
+
"id": "eIUmVDHn3skk"
|
268 |
+
},
|
269 |
+
"execution_count": null,
|
270 |
+
"outputs": []
|
271 |
+
}
|
272 |
+
]
|
273 |
+
}
|