SadeghK commited on
Commit
c29f7e4
·
verified ·
1 Parent(s): 8ed7dc6

Upload convert_to_ggml.ipynb

Browse files

adding conversion script convert_to_ggml.ipynb

Files changed (1) hide show
  1. convert_to_ggml.ipynb +273 -0
convert_to_ggml.ipynb ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "markdown",
19
+ "source": [
20
+ "## Converting to ggml using h5"
21
+ ],
22
+ "metadata": {
23
+ "id": "Kbq33zFd4QXE"
24
+ }
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "source": [
29
+ "!pip install torch safetensors h5py -q"
30
+ ],
31
+ "metadata": {
32
+ "colab": {
33
+ "base_uri": "https://localhost:8080/"
34
+ },
35
+ "id": "M43EPtRB33cE",
36
+ "outputId": "5081aebc-8864-48f4-c869-683e9511f082"
37
+ },
38
+ "execution_count": null,
39
+ "outputs": [
40
+ {
41
+ "output_type": "stream",
42
+ "name": "stdout",
43
+ "text": [
44
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
45
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m60.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
46
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m28.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
47
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
48
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
49
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
50
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
51
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
52
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
53
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m76.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
+ "\u001b[?25h"
55
+ ]
56
+ }
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": null,
62
+ "metadata": {
63
+ "colab": {
64
+ "base_uri": "https://localhost:8080/"
65
+ },
66
+ "id": "e2sGa27b3pvJ",
67
+ "outputId": "5660cafb-830e-409c-9921-feb584e4481c"
68
+ },
69
+ "outputs": [
70
+ {
71
+ "output_type": "stream",
72
+ "name": "stdout",
73
+ "text": [
74
+ "Cloning into 'whisper'...\n",
75
+ "remote: Enumerating objects: 828, done.\u001b[K\n",
76
+ "remote: Counting objects: 100% (370/370), done.\u001b[K\n",
77
+ "remote: Compressing objects: 100% (69/69), done.\u001b[K\n",
78
+ "remote: Total 828 (delta 333), reused 301 (delta 301), pack-reused 458 (from 2)\u001b[K\n",
79
+ "Receiving objects: 100% (828/828), 8.26 MiB | 10.11 MiB/s, done.\n",
80
+ "Resolving deltas: 100% (496/496), done.\n"
81
+ ]
82
+ }
83
+ ],
84
+ "source": [
85
+ "!git clone https://github.com/openai/whisper"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "source": [
91
+ "!git clone https://github.com/ggerganov/whisper.cpp\n",
92
+ "!cd whisper.cpp && make"
93
+ ],
94
+ "metadata": {
95
+ "id": "bnJW45ez3sx0"
96
+ },
97
+ "execution_count": null,
98
+ "outputs": []
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "source": [
103
+ "!git lfs install\n",
104
+ "!git clone https://huggingface.co/sadeghk/whisper-base"
105
+ ],
106
+ "metadata": {
107
+ "colab": {
108
+ "base_uri": "https://localhost:8080/"
109
+ },
110
+ "id": "K7wQ0-d53sve",
111
+ "outputId": "d1736412-81c1-4a64-c3e0-4d46093d3dcd"
112
+ },
113
+ "execution_count": null,
114
+ "outputs": [
115
+ {
116
+ "output_type": "stream",
117
+ "name": "stdout",
118
+ "text": [
119
+ "Git LFS initialized.\n",
120
+ "Cloning into 'whisper-base'...\n",
121
+ "remote: Enumerating objects: 571, done.\u001b[K\n",
122
+ "remote: Counting objects: 100% (568/568), done.\u001b[K\n",
123
+ "remote: Compressing objects: 100% (568/568), done.\u001b[K\n",
124
+ "remote: Total 571 (delta 167), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
125
+ "Receiving objects: 100% (571/571), 692.35 KiB | 3.48 MiB/s, done.\n",
126
+ "Resolving deltas: 100% (167/167), done.\n"
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "source": [
134
+ "import torch\n",
135
+ "import h5py\n",
136
+ "from safetensors.torch import load_file\n",
137
+ "\n",
138
+ "# Load the safetensors file\n",
139
+ "safetensors_path = \"whisper-base/model.safetensors\" # Replace with your file path\n",
140
+ "state_dict = load_file(safetensors_path)\n",
141
+ "\n",
142
+ "# Create an HDF5 file and store the tensors\n",
143
+ "h5_path = \"whisper-base/model.h5\"\n",
144
+ "with h5py.File(h5_path, \"w\") as h5f:\n",
145
+ " for key, tensor in state_dict.items():\n",
146
+ " h5f.create_dataset(key, data=tensor.numpy()) # Convert tensor to numpy before saving\n",
147
+ "\n",
148
+ "print(f\"Conversion complete: {h5_path} saved successfully!\")\n"
149
+ ],
150
+ "metadata": {
151
+ "colab": {
152
+ "base_uri": "https://localhost:8080/"
153
+ },
154
+ "id": "b931-wDb36Bf",
155
+ "outputId": "59337d93-e872-4ed4-dc9b-7b2c29b53c89"
156
+ },
157
+ "execution_count": null,
158
+ "outputs": [
159
+ {
160
+ "output_type": "stream",
161
+ "name": "stdout",
162
+ "text": [
163
+ "Conversion complete: whisper-base/model.h5 saved successfully!\n"
164
+ ]
165
+ }
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "source": [
171
+ "# if there is an error about max_length, set max_length = 448 in ./whisper-base/config.json\n",
172
+ "\n",
173
+ "!python3 ./whisper.cpp/models/convert-h5-to-ggml.py ./whisper-base/ ./whisper ."
174
+ ],
175
+ "metadata": {
176
+ "id": "oNohyE-B3ss5"
177
+ },
178
+ "execution_count": null,
179
+ "outputs": []
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "source": [
184
+ "!./whisper.cpp/build/bin/whisper-cli -m ggml-model.bin -l fa -f cab991ea4681b712417e0d7569c94dccc48f134ed1e6353f8ee69a85.wav"
185
+ ],
186
+ "metadata": {
187
+ "colab": {
188
+ "base_uri": "https://localhost:8080/"
189
+ },
190
+ "id": "yj_lHrgE3sqW",
191
+ "outputId": "b7435036-0a31-4ca5-927d-9952c72f9bcf"
192
+ },
193
+ "execution_count": null,
194
+ "outputs": [
195
+ {
196
+ "output_type": "stream",
197
+ "name": "stdout",
198
+ "text": [
199
+ "whisper_init_from_file_with_params_no_state: loading model from 'ggml-model.bin'\n",
200
+ "whisper_init_with_params_no_state: use gpu = 1\n",
201
+ "whisper_init_with_params_no_state: flash attn = 0\n",
202
+ "whisper_init_with_params_no_state: gpu_device = 0\n",
203
+ "whisper_init_with_params_no_state: dtw = 0\n",
204
+ "whisper_init_with_params_no_state: devices = 1\n",
205
+ "whisper_init_with_params_no_state: backends = 1\n",
206
+ "whisper_model_load: loading model\n",
207
+ "whisper_model_load: n_vocab = 51865\n",
208
+ "whisper_model_load: n_audio_ctx = 1500\n",
209
+ "whisper_model_load: n_audio_state = 512\n",
210
+ "whisper_model_load: n_audio_head = 8\n",
211
+ "whisper_model_load: n_audio_layer = 6\n",
212
+ "whisper_model_load: n_text_ctx = 448\n",
213
+ "whisper_model_load: n_text_state = 512\n",
214
+ "whisper_model_load: n_text_head = 8\n",
215
+ "whisper_model_load: n_text_layer = 6\n",
216
+ "whisper_model_load: n_mels = 80\n",
217
+ "whisper_model_load: ftype = 1\n",
218
+ "whisper_model_load: qntvr = 0\n",
219
+ "whisper_model_load: type = 2 (base)\n",
220
+ "whisper_model_load: adding 1607 extra tokens\n",
221
+ "whisper_model_load: n_langs = 99\n",
222
+ "whisper_model_load: CPU total size = 147.37 MB\n",
223
+ "whisper_model_load: model size = 147.37 MB\n",
224
+ "whisper_backend_init_gpu: no GPU found\n",
225
+ "whisper_init_state: kv self size = 6.29 MB\n",
226
+ "whisper_init_state: kv cross size = 18.87 MB\n",
227
+ "whisper_init_state: kv pad size = 3.15 MB\n",
228
+ "whisper_init_state: compute buffer (conv) = 16.26 MB\n",
229
+ "whisper_init_state: compute buffer (encode) = 85.86 MB\n",
230
+ "whisper_init_state: compute buffer (cross) = 4.65 MB\n",
231
+ "whisper_init_state: compute buffer (decode) = 96.35 MB\n",
232
+ "\n",
233
+ "system_info: n_threads = 2 / 2 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 0 | \n",
234
+ "\n",
235
+ "main: processing 'cab991ea4681b712417e0d7569c94dccc48f134ed1e6353f8ee69a85.wav' (167711 samples, 10.5 sec), 2 threads, 1 processors, 5 beams + best of 5, lang = fa, task = transcribe, timestamps = 1 ...\n",
236
+ "\n",
237
+ "\n",
238
+ "[00:00:00.000 --> 00:00:30.000] لطه اصلی بارش باران جا به جایی هوای مرطوب به علت اختلاف دم آوروتو و تسه که به جبه‌های هواشناسی معروف است\n",
239
+ "\n",
240
+ "\n",
241
+ "whisper_print_timings: load time = 165.24 ms\n",
242
+ "whisper_print_timings: fallbacks = 0 p / 0 h\n",
243
+ "whisper_print_timings: mel time = 39.49 ms\n",
244
+ "whisper_print_timings: sample time = 722.76 ms / 315 runs ( 2.29 ms per run)\n",
245
+ "whisper_print_timings: encode time = 6999.46 ms / 1 runs ( 6999.46 ms per run)\n",
246
+ "whisper_print_timings: decode time = 0.00 ms / 1 runs ( 0.00 ms per run)\n",
247
+ "whisper_print_timings: batchd time = 4939.69 ms / 313 runs ( 15.78 ms per run)\n",
248
+ "whisper_print_timings: prompt time = 0.00 ms / 1 runs ( 0.00 ms per run)\n",
249
+ "whisper_print_timings: total time = 12942.70 ms\n"
250
+ ]
251
+ }
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "code",
256
+ "source": [],
257
+ "metadata": {
258
+ "id": "QN0WRNiY3snj"
259
+ },
260
+ "execution_count": null,
261
+ "outputs": []
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "source": [],
266
+ "metadata": {
267
+ "id": "eIUmVDHn3skk"
268
+ },
269
+ "execution_count": null,
270
+ "outputs": []
271
+ }
272
+ ]
273
+ }