Gor Solomon commited on
Commit
69b0f0d
·
1 Parent(s): 472c2c3
Files changed (30) hide show
  1. Dockerfile +16 -0
  2. LocalDatasets/beyond_good_and_evil/dataset_dict.json +1 -0
  3. LocalDatasets/beyond_good_and_evil/test/cache-08265684f5a37e0d.arrow +3 -0
  4. LocalDatasets/beyond_good_and_evil/test/cache-34c1ec675eccb1e5.arrow +3 -0
  5. LocalDatasets/beyond_good_and_evil/test/cache-380da2e5dcef03df.arrow +3 -0
  6. LocalDatasets/beyond_good_and_evil/test/cache-3c43ca8d6e3d0f31.arrow +3 -0
  7. LocalDatasets/beyond_good_and_evil/test/cache-50cafbc6851e4d0e.arrow +3 -0
  8. LocalDatasets/beyond_good_and_evil/test/cache-5924277b48bb9843.arrow +3 -0
  9. LocalDatasets/beyond_good_and_evil/test/cache-73a969c8f7aa08ad.arrow +3 -0
  10. LocalDatasets/beyond_good_and_evil/test/cache-d2bd62e16d6d96c8.arrow +3 -0
  11. LocalDatasets/beyond_good_and_evil/test/cache-dc890e014966478c.arrow +3 -0
  12. LocalDatasets/beyond_good_and_evil/test/cache-e5cdd0baa5bb87f5.arrow +3 -0
  13. LocalDatasets/beyond_good_and_evil/test/cache-ec995eab82e9c468.arrow +3 -0
  14. LocalDatasets/beyond_good_and_evil/test/cache-f03debb74dbeb867.arrow +3 -0
  15. LocalDatasets/beyond_good_and_evil/test/data-00000-of-00001.arrow +3 -0
  16. LocalDatasets/beyond_good_and_evil/test/dataset_info.json +52 -0
  17. LocalDatasets/beyond_good_and_evil/test/state.json +13 -0
  18. LocalDatasets/beyond_good_and_evil/train/cache-1d1e41ee9bb12a11.arrow +3 -0
  19. LocalDatasets/beyond_good_and_evil/train/cache-27e83bf40614a2f4.arrow +3 -0
  20. LocalDatasets/beyond_good_and_evil/train/cache-30ee22778b46cd81.arrow +3 -0
  21. LocalDatasets/beyond_good_and_evil/train/cache-3a5ee93dc239c198.arrow +3 -0
  22. LocalDatasets/beyond_good_and_evil/train/cache-4b64f7d492377576.arrow +3 -0
  23. LocalDatasets/beyond_good_and_evil/train/cache-91b75d3a40905d78.arrow +3 -0
  24. LocalDatasets/beyond_good_and_evil/train/cache-b8262b23ba04f273.arrow +3 -0
  25. LocalDatasets/beyond_good_and_evil/train/cache-fb60a1da0ad06f2d.arrow +3 -0
  26. LocalDatasets/beyond_good_and_evil/train/data-00000-of-00001.arrow +3 -0
  27. LocalDatasets/beyond_good_and_evil/train/dataset_info.json +52 -0
  28. LocalDatasets/beyond_good_and_evil/train/state.json +13 -0
  29. app.py +102 -0
  30. requirements.txt +0 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
LocalDatasets/beyond_good_and_evil/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "test"]}
LocalDatasets/beyond_good_and_evil/test/cache-08265684f5a37e0d.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abc479c918e2a772a21e3bc756e02e3e47f301a268a7fb1011544ef947ba15c
3
+ size 177792
LocalDatasets/beyond_good_and_evil/test/cache-34c1ec675eccb1e5.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9116a9e3c763d1f2c586938e7b7ec3693609f4280806bbe9288a3bb1a837096c
3
+ size 177792
LocalDatasets/beyond_good_and_evil/test/cache-380da2e5dcef03df.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a946577cf4be8de195469876fcba14da1072537f97e04489ec8e40945c15acd1
3
+ size 403776
LocalDatasets/beyond_good_and_evil/test/cache-3c43ca8d6e3d0f31.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b37d146ef1a3d9978cfd2168b4132f4f65b2225b6e0d06ac2c8be802c19dfa
3
+ size 206984
LocalDatasets/beyond_good_and_evil/test/cache-50cafbc6851e4d0e.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:691efdee0462024312a83a1d8eae40ddbb973792d0bde1e0eb5a731a3a0c9c01
3
+ size 206984
LocalDatasets/beyond_good_and_evil/test/cache-5924277b48bb9843.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7219682d55c8cfed0f12c9c764964428cf0986c496872447c15ca441fec875e3
3
+ size 374584
LocalDatasets/beyond_good_and_evil/test/cache-73a969c8f7aa08ad.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a75dcfc0e6d8e68ea6811ebd24e184d1e89f0d9e459339d419fd79b36d4cfdf5
3
+ size 173896
LocalDatasets/beyond_good_and_evil/test/cache-d2bd62e16d6d96c8.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39b78aba2ee8f0758a235ea596a7c4c353e47677c4b96b34b5e306f4060d668a
3
+ size 374584
LocalDatasets/beyond_good_and_evil/test/cache-dc890e014966478c.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0b3d0a5f17522e788370ffcb8d0f3cf0825f92909023f1d6483965761492a8e
3
+ size 374584
LocalDatasets/beyond_good_and_evil/test/cache-e5cdd0baa5bb87f5.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b64cb28a96027d90ce155d571c573ce4a0c4b3abdd3a1ae857c5f1c6b3a409
3
+ size 206984
LocalDatasets/beyond_good_and_evil/test/cache-ec995eab82e9c468.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe7718b13ad450da4ebbd1167e5848afe38bc34f0203ae77fb5108c5c775c0d0
3
+ size 206984
LocalDatasets/beyond_good_and_evil/test/cache-f03debb74dbeb867.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b5dffa1345ad7c58b1a130599cb0e70774f12d7c39913bb3033d1b12d6eb050
3
+ size 403776
LocalDatasets/beyond_good_and_evil/test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4cdaf68643f9c6c193543cd887df730e79002944da33696d8773cde7ebad9c
3
+ size 33352
LocalDatasets/beyond_good_and_evil/test/dataset_info.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "csv",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "neitzsche_beyond_good_and_evil_convo",
6
+ "dataset_size": 411026,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/train.csv": {
10
+ "num_bytes": 377855,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/test.csv": {
14
+ "num_bytes": 32660,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 410515,
19
+ "features": {
20
+ "prompt": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "completion": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 821541,
32
+ "splits": {
33
+ "train": {
34
+ "name": "train",
35
+ "num_bytes": 378283,
36
+ "num_examples": 458,
37
+ "dataset_name": "neitzsche_beyond_good_and_evil_convo"
38
+ },
39
+ "test": {
40
+ "name": "test",
41
+ "num_bytes": 32743,
42
+ "num_examples": 56,
43
+ "dataset_name": "neitzsche_beyond_good_and_evil_convo"
44
+ }
45
+ },
46
+ "version": {
47
+ "version_str": "0.0.0",
48
+ "major": 0,
49
+ "minor": 0,
50
+ "patch": 0
51
+ }
52
+ }
LocalDatasets/beyond_good_and_evil/test/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "ea422c8bc997ca86",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "test"
13
+ }
LocalDatasets/beyond_good_and_evil/train/cache-1d1e41ee9bb12a11.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9882469aac4f7f2a13181b096a936c8dbd055cfe65d030f69a1c8f5ba02d148d
3
+ size 3055120
LocalDatasets/beyond_good_and_evil/train/cache-27e83bf40614a2f4.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9be3beed8e6cda2c0200eb7e100be9654125524fabe501147f6625f27d8563e
3
+ size 1792288
LocalDatasets/beyond_good_and_evil/train/cache-30ee22778b46cd81.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec0e38c49f220ce96db5dec3b766c7adf497bde2b607f98d872b6453f986e50
3
+ size 1413664
LocalDatasets/beyond_good_and_evil/train/cache-3a5ee93dc239c198.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f30113cb0dd64df73574b1dce8476756b0b5143fd1907103c8d82302a3efbba1
3
+ size 3055120
LocalDatasets/beyond_good_and_evil/train/cache-4b64f7d492377576.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02e2d7a4557649989a9d14772ff0a3ed585f76191045f78cbbc91e4483aa4618
3
+ size 3291744
LocalDatasets/beyond_good_and_evil/train/cache-91b75d3a40905d78.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9cd1f47b95e3648167636ce42917b33fd9fbcec8e71afdd6acda59584a9dbf3
3
+ size 1555664
LocalDatasets/beyond_good_and_evil/train/cache-b8262b23ba04f273.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a0be3ec476e7a8c296f5a96a6e5805232a5329dbaf0652e6c0c94c9cfa3ab0b
3
+ size 1792288
LocalDatasets/beyond_good_and_evil/train/cache-fb60a1da0ad06f2d.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981faa71a2bc528a525658ae2b97602b8b37b542f33a7a5a634423e8e9902aae
3
+ size 3291744
LocalDatasets/beyond_good_and_evil/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4889c31e70d9aadc5f08a297e23f67cd00657ba58f1cf9ee2fb7a382f930f440
3
+ size 378888
LocalDatasets/beyond_good_and_evil/train/dataset_info.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "csv",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "neitzsche_beyond_good_and_evil_convo",
6
+ "dataset_size": 411026,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/train.csv": {
10
+ "num_bytes": 377855,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/Augustya07/neitzsche_beyond_good_and_evil_convo@f76c977dfb7903f1dccaef6d592fb877538c357d/test.csv": {
14
+ "num_bytes": 32660,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 410515,
19
+ "features": {
20
+ "prompt": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "completion": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 821541,
32
+ "splits": {
33
+ "train": {
34
+ "name": "train",
35
+ "num_bytes": 378283,
36
+ "num_examples": 458,
37
+ "dataset_name": "neitzsche_beyond_good_and_evil_convo"
38
+ },
39
+ "test": {
40
+ "name": "test",
41
+ "num_bytes": 32743,
42
+ "num_examples": 56,
43
+ "dataset_name": "neitzsche_beyond_good_and_evil_convo"
44
+ }
45
+ },
46
+ "version": {
47
+ "version_str": "0.0.0",
48
+ "major": 0,
49
+ "minor": 0,
50
+ "patch": 0
51
+ }
52
+ }
LocalDatasets/beyond_good_and_evil/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "a68dbd9306ce0628",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_from_disk
2
+ from fastapi import FastAPI
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
4
+ import evaluate
5
+ import numpy as np
6
+ import torch
7
+
8
+ # Load the dataset
9
+ dataset = load_from_disk('LocalDatasets/beyond_good_and_evil')
10
+
11
+ # Initialize the tokenizer
12
+ checkpoint = "gpt2"
13
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
14
+ tokenizer.pad_token = tokenizer.eos_token
15
+
16
+ # Tokenize the dataset
17
+ def tokenize_function(example):
18
+ return tokenizer(example['prompt'], example['completion'], truncation=True, padding='max_length', max_length=512)
19
+
20
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
21
+
22
+ # Prepare the dataset for training
23
+ def preprocess_function(examples):
24
+ inputs = tokenizer(examples['prompt'], truncation=True, padding='max_length', max_length=512)
25
+ outputs = tokenizer(examples['completion'], truncation=True, padding='max_length', max_length=512)
26
+
27
+ inputs["labels"] = outputs["input_ids"]
28
+ return inputs
29
+
30
+ tokenized_datasets = tokenized_datasets.map(preprocess_function, batched=True, remove_columns=['prompt', 'completion'])
31
+
32
+ # Initialize the data collator
33
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
34
+
35
+ # Split the dataset
36
+ train_dataset = tokenized_datasets['train']
37
+ eval_dataset = tokenized_datasets['test']
38
+
39
+ # Load the model
40
+ model = AutoModelForCausalLM.from_pretrained(checkpoint)
41
+
42
+ # Define training arguments
43
+ training_args = TrainingArguments(
44
+ output_dir='./results',
45
+ evaluation_strategy='epoch',
46
+ learning_rate=1e-5,
47
+ per_device_train_batch_size=4, # Set batch size to 1
48
+ per_device_eval_batch_size=4, # Set batch size to 1
49
+ num_train_epochs=90,
50
+ weight_decay=0.01,
51
+ save_total_limit=2,
52
+ )
53
+
54
+ # Define the compute metrics function
55
+ metric = evaluate.load("accuracy")
56
+
57
+ def compute_metrics(eval_pred):
58
+ logits, labels = eval_pred
59
+ predictions = np.argmax(logits, axis=-1)
60
+ return metric.compute(predictions=predictions, references=labels)
61
+
62
+ # Initialize the Trainer
63
+ trainer = Trainer(
64
+ model=model,
65
+ args=training_args,
66
+ train_dataset=train_dataset,
67
+ eval_dataset=eval_dataset,
68
+ tokenizer=tokenizer,
69
+ data_collator=data_collator,
70
+ compute_metrics=compute_metrics,
71
+ )
72
+
73
+ # Train the model
74
+ trainer.train()
75
+
76
+ # Save the model and tokenizer
77
+ model.save_pretrained('./saved_model')
78
+ tokenizer.save_pretrained('./saved_model')
79
+
80
+ # Load the model and tokenizer for inference
81
+ model = AutoModelForCausalLM.from_pretrained('./saved_model')
82
+ tokenizer = AutoTokenizer.from_pretrained('./saved_model')
83
+
84
+ # Example inference
85
+ example_prompt = "What is the main idea of Nietzsche's philosophy?"
86
+
87
+ inputs = tokenizer(example_prompt, return_tensors='pt', truncation=True, padding=True, max_length=512)
88
+ with torch.no_grad():
89
+ outputs = model.generate(**inputs, max_length=100, num_beams=5, early_stopping=True)
90
+ completion = tokenizer.decode(outputs[0], skip_special_tokens=True)
91
+
92
+ print(f"Generated completion: {completion}")
93
+
94
+ app = FastAPI()
95
+
96
+ @app.get("/{param_prompt}")
97
+ def greet_json(param_prompt: str = "Friedrich Nietzsche, I have just started reading your work and I must say, it is quite thought-provoking. I am intrigued by your concept of the 'Will to Truth.' Can you explain to me what this means?"):
98
+ f_inputs = tokenizer(param_prompt, return_tensors='pt', truncation=True, padding=True, max_length=512)
99
+ with torch.no_grad():
100
+ f_outputs = model.generate(**f_inputs, max_length=200, num_beams=5, early_stopping=True)
101
+ f_completion = tokenizer.decode(f_outputs[0], skip_special_tokens=True)
102
+ return {"Answer: ": f_completion}
requirements.txt ADDED
Binary file (2.09 kB). View file