Caleb Ellington
commited on
Commit
·
a0592b9
1
Parent(s):
9626f4f
update with major refactor
Browse files- README.md +3 -3
- config.yaml +5 -5
README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
from huggingface_hub import snapshot_download
|
5 |
from pathlib import Path
|
6 |
|
7 |
-
model_name = "genbio-ai/
|
8 |
genbio_models_path = Path.home().joinpath('genbio_models', model_name)
|
9 |
genbio_models_path.mkdir(parents=True, exist_ok=True)
|
10 |
snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
|
@@ -12,12 +12,12 @@ snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
|
|
12 |
### Load model for inference
|
13 |
```python
|
14 |
import torch
|
15 |
-
from
|
16 |
|
17 |
ckpt_path = genbio_models_path.joinpath('model.ckpt')
|
18 |
model = SequenceClassification.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
|
19 |
|
20 |
-
collated_batch = model.
|
21 |
logits = model(collated_batch)
|
22 |
print(logits)
|
23 |
print(torch.argmax(logits, dim=-1))
|
|
|
4 |
from huggingface_hub import snapshot_download
|
5 |
from pathlib import Path
|
6 |
|
7 |
+
model_name = "genbio-ai/aido_dna_7b-nt-promoter-all-ckpt"
|
8 |
genbio_models_path = Path.home().joinpath('genbio_models', model_name)
|
9 |
genbio_models_path.mkdir(parents=True, exist_ok=True)
|
10 |
snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
|
|
|
12 |
### Load model for inference
|
13 |
```python
|
14 |
import torch
|
15 |
+
from modelgenerator.tasks import SequenceClassification
|
16 |
|
17 |
ckpt_path = genbio_models_path.joinpath('model.ckpt')
|
18 |
model = SequenceClassification.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
|
19 |
|
20 |
+
collated_batch = model.transform({"sequences": ["ACGT", "AGCT"]})
|
21 |
logits = model(collated_batch)
|
22 |
print(logits)
|
23 |
print(torch.argmax(logits, dim=-1))
|
config.yaml
CHANGED
@@ -141,10 +141,10 @@ trainer:
|
|
141 |
reload_dataloaders_every_n_epochs: 0
|
142 |
default_root_dir: logs
|
143 |
model:
|
144 |
-
class_path:
|
145 |
init_args:
|
146 |
backbone:
|
147 |
-
class_path:
|
148 |
init_args:
|
149 |
from_scratch: false
|
150 |
use_peft: true
|
@@ -155,7 +155,7 @@ model:
|
|
155 |
config_overwrites: null
|
156 |
model_init_args: null
|
157 |
max_length: 302
|
158 |
-
adapter:
|
159 |
n_classes: 2
|
160 |
optimizer:
|
161 |
class_path: torch.optim.AdamW
|
@@ -173,14 +173,14 @@ model:
|
|
173 |
differentiable: false
|
174 |
fused: null
|
175 |
lr_scheduler:
|
176 |
-
class_path:
|
177 |
init_args:
|
178 |
warmup_ratio: 0.1
|
179 |
use_legacy_adapter: false
|
180 |
strict_loading: true
|
181 |
reset_optimizer_states: false
|
182 |
data:
|
183 |
-
class_path:
|
184 |
init_args:
|
185 |
hf_name: InstaDeepAI/nucleotide_transformer_downstream_tasks
|
186 |
task: promoter_all
|
|
|
141 |
reload_dataloaders_every_n_epochs: 0
|
142 |
default_root_dir: logs
|
143 |
model:
|
144 |
+
class_path: modelgenerator.tasks.SequenceClassification
|
145 |
init_args:
|
146 |
backbone:
|
147 |
+
class_path: modelgenerator.backbones.aido_dna_7b
|
148 |
init_args:
|
149 |
from_scratch: false
|
150 |
use_peft: true
|
|
|
155 |
config_overwrites: null
|
156 |
model_init_args: null
|
157 |
max_length: 302
|
158 |
+
adapter: modelgenerator.adapters.LinearCLSAdapter
|
159 |
n_classes: 2
|
160 |
optimizer:
|
161 |
class_path: torch.optim.AdamW
|
|
|
173 |
differentiable: false
|
174 |
fused: null
|
175 |
lr_scheduler:
|
176 |
+
class_path: modelgenerator.lr_schedulers.CosineWithWarmup
|
177 |
init_args:
|
178 |
warmup_ratio: 0.1
|
179 |
use_legacy_adapter: false
|
180 |
strict_loading: true
|
181 |
reset_optimizer_states: false
|
182 |
data:
|
183 |
+
class_path: modelgenerator.data.NTClassification
|
184 |
init_args:
|
185 |
hf_name: InstaDeepAI/nucleotide_transformer_downstream_tasks
|
186 |
task: promoter_all
|