tomg-group-umd
/

huginn-0125

Text Generation

Model card Files Files and versions Community

huginn-0125 / README.md

JonasGeiping's picture

Update README.md

6b33256 verified about 2 months ago

|

1.68 kB

	---
	library_name: transformers
	tags: []
	---




	```python
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model = AutoModelForCausalLM.from_pretrained("tomg-group-umd/step-00047360-recurrence_full_512_0", trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained("tomg-group-umd/step-00047360-recurrence_full_512_0")

	device=torch.device("cuda:0")

	input_ids = tokenizer.encode("The capital of Westphalia is", return_tensors="pt", add_special_tokens=True).to(device)[:, :-1]
	model.eval()
	model.to(device)

	model(input_ids)

	# or, more efficiently
	amp_settings = {"device_type": "cuda", "enabled": True, "dtype": torch.bfloat16}
	if not amp_settings["enabled"]:
	torch.backends.cuda.enable_math_sdp(True)



	with torch.autocast(**amp_settings), torch.no_grad():
	model(input_ids=input_ids)

	###### Caching:
	# first step:
	past_key_values = None
	outputs = model(input_ids=input_ids, use_cache=True, past_key_values=past_key_values)
	past_key_values = outputs.past_key_values
	# next step
	outputs = model(input_ids=input_ids, use_cache=True, past_key_values=past_key_values)

	######## Generate!
	with torch.autocast(**amp_settings), torch.no_grad():
	output_ids = model.generate(input_ids, max_new_tokens=20, use_cache=True, num_steps=32)

	print(tokenizer.decode(output_ids[0]))


	# with or without cache
	with torch.autocast(**amp_settings), torch.no_grad():
	output_ids = model.generate(input_ids, max_new_tokens=20, use_cache=False, num_steps=32)

	print(tokenizer.decode(output_ids[0]))

	# Both are supposed to print:

	# <\|begin_text\|>The capital of Westphalia is the city of Münster. The city is located in the north of the state and is
	```