slicexai
/

elm-v0.1_toxicity_detection

Text Generation

Model card Files Files and versions Community

dev-slx commited on Apr 17, 2024

Commit

896f67a

·

verified ·

1 Parent(s): 2ed7102

Update elm/model.py

Files changed (1) hide show

elm/model.py +5 -7

elm/model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024, SliceX AI, Inc. All Rights Reserved.
 import copy
 import inspect
@@ -100,15 +100,12 @@ class ELM(torch.nn.Module):
         else:
             x = self.slice_transformer.drop(tok_emb)
-        tlayer_id = 0
         ignore_index_id = -100
         loss = torch.zeros(1).to(device)
         loss_denom = 0
         for tlayer in self.slice_transformer.h:
             x = tlayer(x, attention_mask=attention_mask)
-            tlayer_id += 1
         x = self.slice_transformer.ln_f(x)
@@ -133,9 +130,8 @@ class ELM(torch.nn.Module):
     def get_num_params(self, non_embedding=True):
         """
         Return the number of parameters in the model.
-        For non-embedding count (default), the position embeddings get subtracted.
-        This assumes parameter tying between input and final layer embeddings. Oherwise
-        If there is no parameter sharing , set the flag to False to include parameters for both layers.
         """
         n_params = sum(p.numel() for p in self.parameters())
         if non_embedding and not self.model_args.use_rotary_embeddings:
@@ -342,6 +338,8 @@ def init_elm_model(model_args=ModelArgs(), device="cuda", model_config_dict=None
         model_args = ModelArgs(**model_config_dict)
     dtype = torch.bfloat16 if device=="cuda" and torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
     model = ELM(model_args=model_args).to(dtype=dtype)

+# Copyright (c) 2024, SliceX AI, Inc.
 import copy
 import inspect
         else:
             x = self.slice_transformer.drop(tok_emb)
         ignore_index_id = -100
         loss = torch.zeros(1).to(device)
         loss_denom = 0
         for tlayer in self.slice_transformer.h:
             x = tlayer(x, attention_mask=attention_mask)
         x = self.slice_transformer.ln_f(x)
     def get_num_params(self, non_embedding=True):
         """
         Return the number of parameters in the model.
+        For non-embedding count (default), subtract position embeddings if parameter tying applies.
+        If there is no parameter sharing, set the flag to False to include parameters for both input/output layers.
         """
         n_params = sum(p.numel() for p in self.parameters())
         if non_embedding and not self.model_args.use_rotary_embeddings:
         model_args = ModelArgs(**model_config_dict)
     dtype = torch.bfloat16 if device=="cuda" and torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
+    if not torch.cuda.is_available():
+        dtype = torch.bfloat16
     model = ELM(model_args=model_args).to(dtype=dtype)