Spaces:
Paused
Paused
Gor Solomon
commited on
Commit
·
69b0f0d
1
Parent(s):
472c2c3
init
Browse files- Dockerfile +16 -0
- LocalDatasets/beyond_good_and_evil/dataset_dict.json +1 -0
- LocalDatasets/beyond_good_and_evil/test/cache-08265684f5a37e0d.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-34c1ec675eccb1e5.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-380da2e5dcef03df.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-3c43ca8d6e3d0f31.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-50cafbc6851e4d0e.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-5924277b48bb9843.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-73a969c8f7aa08ad.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-d2bd62e16d6d96c8.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-dc890e014966478c.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-e5cdd0baa5bb87f5.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-ec995eab82e9c468.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/cache-f03debb74dbeb867.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/data-00000-of-00001.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/test/dataset_info.json +52 -0
- LocalDatasets/beyond_good_and_evil/test/state.json +13 -0
- LocalDatasets/beyond_good_and_evil/train/cache-1d1e41ee9bb12a11.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/cache-27e83bf40614a2f4.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/cache-30ee22778b46cd81.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/cache-3a5ee93dc239c198.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/cache-4b64f7d492377576.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/cache-91b75d3a40905d78.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/cache-b8262b23ba04f273.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/cache-fb60a1da0ad06f2d.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/data-00000-of-00001.arrow +3 -0
- LocalDatasets/beyond_good_and_evil/train/dataset_info.json +52 -0
- LocalDatasets/beyond_good_and_evil/train/state.json +13 -0
- app.py +102 -0
- requirements.txt +0 -0
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
USER user
|
8 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
9 |
+
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
14 |
+
|
15 |
+
COPY --chown=user . /app
|
16 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
LocalDatasets/beyond_good_and_evil/dataset_dict.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"splits": ["train", "test"]}
|
LocalDatasets/beyond_good_and_evil/test/cache-08265684f5a37e0d.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3abc479c918e2a772a21e3bc756e02e3e47f301a268a7fb1011544ef947ba15c
|
3 |
+
size 177792
|
LocalDatasets/beyond_good_and_evil/test/cache-34c1ec675eccb1e5.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9116a9e3c763d1f2c586938e7b7ec3693609f4280806bbe9288a3bb1a837096c
|
3 |
+
size 177792
|
LocalDatasets/beyond_good_and_evil/test/cache-380da2e5dcef03df.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a946577cf4be8de195469876fcba14da1072537f97e04489ec8e40945c15acd1
|
3 |
+
size 403776
|
LocalDatasets/beyond_good_and_evil/test/cache-3c43ca8d6e3d0f31.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74b37d146ef1a3d9978cfd2168b4132f4f65b2225b6e0d06ac2c8be802c19dfa
|
3 |
+
size 206984
|
LocalDatasets/beyond_good_and_evil/test/cache-50cafbc6851e4d0e.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:691efdee0462024312a83a1d8eae40ddbb973792d0bde1e0eb5a731a3a0c9c01
|
3 |
+
size 206984
|
LocalDatasets/beyond_good_and_evil/test/cache-5924277b48bb9843.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7219682d55c8cfed0f12c9c764964428cf0986c496872447c15ca441fec875e3
|
3 |
+
size 374584
|
LocalDatasets/beyond_good_and_evil/test/cache-73a969c8f7aa08ad.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a75dcfc0e6d8e68ea6811ebd24e184d1e89f0d9e459339d419fd79b36d4cfdf5
|
3 |
+
size 173896
|
LocalDatasets/beyond_good_and_evil/test/cache-d2bd62e16d6d96c8.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39b78aba2ee8f0758a235ea596a7c4c353e47677c4b96b34b5e306f4060d668a
|
3 |
+
size 374584
|
LocalDatasets/beyond_good_and_evil/test/cache-dc890e014966478c.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0b3d0a5f17522e788370ffcb8d0f3cf0825f92909023f1d6483965761492a8e
|
3 |
+
size 374584
|
LocalDatasets/beyond_good_and_evil/test/cache-e5cdd0baa5bb87f5.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50b64cb28a96027d90ce155d571c573ce4a0c4b3abdd3a1ae857c5f1c6b3a409
|
3 |
+
size 206984
|
LocalDatasets/beyond_good_and_evil/test/cache-ec995eab82e9c468.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe7718b13ad450da4ebbd1167e5848afe38bc34f0203ae77fb5108c5c775c0d0
|
3 |
+
size 206984
|
LocalDatasets/beyond_good_and_evil/test/cache-f03debb74dbeb867.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5dffa1345ad7c58b1a130599cb0e70774f12d7c39913bb3033d1b12d6eb050
|
3 |
+
size 403776
|
LocalDatasets/beyond_good_and_evil/test/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd4cdaf68643f9c6c193543cd887df730e79002944da33696d8773cde7ebad9c
|
3 |
+
size 33352
|
LocalDatasets/beyond_good_and_evil/test/dataset_info.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"builder_name": "csv",
|
3 |
+
"citation": "",
|
4 |
+
"config_name": "default",
|
5 |
+
"dataset_name": "neitzsche_beyond_good_and_evil_convo",
|
6 |
+
"dataset_size": 411026,
|
7 |
+
"description": "",
|
8 |
+
"download_checksums": {
|
9 |
+
"hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/train.csv": {
|
10 |
+
"num_bytes": 377855,
|
11 |
+
"checksum": null
|
12 |
+
},
|
13 |
+
"hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/test.csv": {
|
14 |
+
"num_bytes": 32660,
|
15 |
+
"checksum": null
|
16 |
+
}
|
17 |
+
},
|
18 |
+
"download_size": 410515,
|
19 |
+
"features": {
|
20 |
+
"prompt": {
|
21 |
+
"dtype": "string",
|
22 |
+
"_type": "Value"
|
23 |
+
},
|
24 |
+
"completion": {
|
25 |
+
"dtype": "string",
|
26 |
+
"_type": "Value"
|
27 |
+
}
|
28 |
+
},
|
29 |
+
"homepage": "",
|
30 |
+
"license": "",
|
31 |
+
"size_in_bytes": 821541,
|
32 |
+
"splits": {
|
33 |
+
"train": {
|
34 |
+
"name": "train",
|
35 |
+
"num_bytes": 378283,
|
36 |
+
"num_examples": 458,
|
37 |
+
"dataset_name": "neitzsche_beyond_good_and_evil_convo"
|
38 |
+
},
|
39 |
+
"test": {
|
40 |
+
"name": "test",
|
41 |
+
"num_bytes": 32743,
|
42 |
+
"num_examples": 56,
|
43 |
+
"dataset_name": "neitzsche_beyond_good_and_evil_convo"
|
44 |
+
}
|
45 |
+
},
|
46 |
+
"version": {
|
47 |
+
"version_str": "0.0.0",
|
48 |
+
"major": 0,
|
49 |
+
"minor": 0,
|
50 |
+
"patch": 0
|
51 |
+
}
|
52 |
+
}
|
LocalDatasets/beyond_good_and_evil/test/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "ea422c8bc997ca86",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": "test"
|
13 |
+
}
|
LocalDatasets/beyond_good_and_evil/train/cache-1d1e41ee9bb12a11.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9882469aac4f7f2a13181b096a936c8dbd055cfe65d030f69a1c8f5ba02d148d
|
3 |
+
size 3055120
|
LocalDatasets/beyond_good_and_evil/train/cache-27e83bf40614a2f4.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9be3beed8e6cda2c0200eb7e100be9654125524fabe501147f6625f27d8563e
|
3 |
+
size 1792288
|
LocalDatasets/beyond_good_and_evil/train/cache-30ee22778b46cd81.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ec0e38c49f220ce96db5dec3b766c7adf497bde2b607f98d872b6453f986e50
|
3 |
+
size 1413664
|
LocalDatasets/beyond_good_and_evil/train/cache-3a5ee93dc239c198.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f30113cb0dd64df73574b1dce8476756b0b5143fd1907103c8d82302a3efbba1
|
3 |
+
size 3055120
|
LocalDatasets/beyond_good_and_evil/train/cache-4b64f7d492377576.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02e2d7a4557649989a9d14772ff0a3ed585f76191045f78cbbc91e4483aa4618
|
3 |
+
size 3291744
|
LocalDatasets/beyond_good_and_evil/train/cache-91b75d3a40905d78.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9cd1f47b95e3648167636ce42917b33fd9fbcec8e71afdd6acda59584a9dbf3
|
3 |
+
size 1555664
|
LocalDatasets/beyond_good_and_evil/train/cache-b8262b23ba04f273.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a0be3ec476e7a8c296f5a96a6e5805232a5329dbaf0652e6c0c94c9cfa3ab0b
|
3 |
+
size 1792288
|
LocalDatasets/beyond_good_and_evil/train/cache-fb60a1da0ad06f2d.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:981faa71a2bc528a525658ae2b97602b8b37b542f33a7a5a634423e8e9902aae
|
3 |
+
size 3291744
|
LocalDatasets/beyond_good_and_evil/train/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4889c31e70d9aadc5f08a297e23f67cd00657ba58f1cf9ee2fb7a382f930f440
|
3 |
+
size 378888
|
LocalDatasets/beyond_good_and_evil/train/dataset_info.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"builder_name": "csv",
|
3 |
+
"citation": "",
|
4 |
+
"config_name": "default",
|
5 |
+
"dataset_name": "neitzsche_beyond_good_and_evil_convo",
|
6 |
+
"dataset_size": 411026,
|
7 |
+
"description": "",
|
8 |
+
"download_checksums": {
|
9 |
+
"hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/train.csv": {
|
10 |
+
"num_bytes": 377855,
|
11 |
+
"checksum": null
|
12 |
+
},
|
13 |
+
"hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/test.csv": {
|
14 |
+
"num_bytes": 32660,
|
15 |
+
"checksum": null
|
16 |
+
}
|
17 |
+
},
|
18 |
+
"download_size": 410515,
|
19 |
+
"features": {
|
20 |
+
"prompt": {
|
21 |
+
"dtype": "string",
|
22 |
+
"_type": "Value"
|
23 |
+
},
|
24 |
+
"completion": {
|
25 |
+
"dtype": "string",
|
26 |
+
"_type": "Value"
|
27 |
+
}
|
28 |
+
},
|
29 |
+
"homepage": "",
|
30 |
+
"license": "",
|
31 |
+
"size_in_bytes": 821541,
|
32 |
+
"splits": {
|
33 |
+
"train": {
|
34 |
+
"name": "train",
|
35 |
+
"num_bytes": 378283,
|
36 |
+
"num_examples": 458,
|
37 |
+
"dataset_name": "neitzsche_beyond_good_and_evil_convo"
|
38 |
+
},
|
39 |
+
"test": {
|
40 |
+
"name": "test",
|
41 |
+
"num_bytes": 32743,
|
42 |
+
"num_examples": 56,
|
43 |
+
"dataset_name": "neitzsche_beyond_good_and_evil_convo"
|
44 |
+
}
|
45 |
+
},
|
46 |
+
"version": {
|
47 |
+
"version_str": "0.0.0",
|
48 |
+
"major": 0,
|
49 |
+
"minor": 0,
|
50 |
+
"patch": 0
|
51 |
+
}
|
52 |
+
}
|
LocalDatasets/beyond_good_and_evil/train/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "a68dbd9306ce0628",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": "train"
|
13 |
+
}
|
app.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_from_disk
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
|
4 |
+
import evaluate
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
|
8 |
+
# Load the dataset
|
9 |
+
dataset = load_from_disk('LocalDatasets/beyond_good_and_evil')
|
10 |
+
|
11 |
+
# Initialize the tokenizer
|
12 |
+
checkpoint = "gpt2"
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
14 |
+
tokenizer.pad_token = tokenizer.eos_token
|
15 |
+
|
16 |
+
# Tokenize the dataset
|
17 |
+
def tokenize_function(example):
|
18 |
+
return tokenizer(example['prompt'], example['completion'], truncation=True, padding='max_length', max_length=512)
|
19 |
+
|
20 |
+
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
21 |
+
|
22 |
+
# Prepare the dataset for training
|
23 |
+
def preprocess_function(examples):
|
24 |
+
inputs = tokenizer(examples['prompt'], truncation=True, padding='max_length', max_length=512)
|
25 |
+
outputs = tokenizer(examples['completion'], truncation=True, padding='max_length', max_length=512)
|
26 |
+
|
27 |
+
inputs["labels"] = outputs["input_ids"]
|
28 |
+
return inputs
|
29 |
+
|
30 |
+
tokenized_datasets = tokenized_datasets.map(preprocess_function, batched=True, remove_columns=['prompt', 'completion'])
|
31 |
+
|
32 |
+
# Initialize the data collator
|
33 |
+
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
34 |
+
|
35 |
+
# Split the dataset
|
36 |
+
train_dataset = tokenized_datasets['train']
|
37 |
+
eval_dataset = tokenized_datasets['test']
|
38 |
+
|
39 |
+
# Load the model
|
40 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint)
|
41 |
+
|
42 |
+
# Define training arguments
|
43 |
+
training_args = TrainingArguments(
|
44 |
+
output_dir='./results',
|
45 |
+
evaluation_strategy='epoch',
|
46 |
+
learning_rate=1e-5,
|
47 |
+
per_device_train_batch_size=4, # Set batch size to 1
|
48 |
+
per_device_eval_batch_size=4, # Set batch size to 1
|
49 |
+
num_train_epochs=90,
|
50 |
+
weight_decay=0.01,
|
51 |
+
save_total_limit=2,
|
52 |
+
)
|
53 |
+
|
54 |
+
# Define the compute metrics function
|
55 |
+
metric = evaluate.load("accuracy")
|
56 |
+
|
57 |
+
def compute_metrics(eval_pred):
|
58 |
+
logits, labels = eval_pred
|
59 |
+
predictions = np.argmax(logits, axis=-1)
|
60 |
+
return metric.compute(predictions=predictions, references=labels)
|
61 |
+
|
62 |
+
# Initialize the Trainer
|
63 |
+
trainer = Trainer(
|
64 |
+
model=model,
|
65 |
+
args=training_args,
|
66 |
+
train_dataset=train_dataset,
|
67 |
+
eval_dataset=eval_dataset,
|
68 |
+
tokenizer=tokenizer,
|
69 |
+
data_collator=data_collator,
|
70 |
+
compute_metrics=compute_metrics,
|
71 |
+
)
|
72 |
+
|
73 |
+
# Train the model
|
74 |
+
trainer.train()
|
75 |
+
|
76 |
+
# Save the model and tokenizer
|
77 |
+
model.save_pretrained('./saved_model')
|
78 |
+
tokenizer.save_pretrained('./saved_model')
|
79 |
+
|
80 |
+
# Load the model and tokenizer for inference
|
81 |
+
model = AutoModelForCausalLM.from_pretrained('./saved_model')
|
82 |
+
tokenizer = AutoTokenizer.from_pretrained('./saved_model')
|
83 |
+
|
84 |
+
# Example inference
|
85 |
+
example_prompt = "What is the main idea of Nietzsche's philosophy?"
|
86 |
+
|
87 |
+
inputs = tokenizer(example_prompt, return_tensors='pt', truncation=True, padding=True, max_length=512)
|
88 |
+
with torch.no_grad():
|
89 |
+
outputs = model.generate(**inputs, max_length=100, num_beams=5, early_stopping=True)
|
90 |
+
completion = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
91 |
+
|
92 |
+
print(f"Generated completion: {completion}")
|
93 |
+
|
94 |
+
app = FastAPI()
|
95 |
+
|
96 |
+
@app.get("/{param_prompt}")
|
97 |
+
def greet_json(param_prompt: str = "Friedrich Nietzsche, I have just started reading your work and I must say, it is quite thought-provoking. I am intrigued by your concept of the 'Will to Truth.' Can you explain to me what this means?"):
|
98 |
+
f_inputs = tokenizer(param_prompt, return_tensors='pt', truncation=True, padding=True, max_length=512)
|
99 |
+
with torch.no_grad():
|
100 |
+
f_outputs = model.generate(**f_inputs, max_length=200, num_beams=5, early_stopping=True)
|
101 |
+
f_completion = tokenizer.decode(f_outputs[0], skip_special_tokens=True)
|
102 |
+
return {"Answer: ": f_completion}
|
requirements.txt
ADDED
Binary file (2.09 kB). View file
|
|