From f7c1b27656904b0d050cf820ca8dfdc4aa12c051 Mon Sep 17 00:00:00 2001 From: Mahimai Raja J Date: Tue, 12 Sep 2023 15:23:58 +0000 Subject: [PATCH] Added cuda config on Sample Code I tried to replicate the `sample code` in the free versions of `kaggle`, `Colab` and `SageMaker Studio Lab`. When running on GPU: ``` bash RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! ``` And While running on CPU: ``` RuntimeError: "LayerNormKernelImpl" not implemented for 'Half' ``` | Changes I made: ```diff import torch from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype="auto") tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype="auto") + device = torch.device("cuda:0") + model.cuda() inputs = tokenizer('''```python def print_prime(n): """ Print all primes between 1 and n - """''', return_tensors="pt", return_attention_mask=False) + """''', return_tensors="pt", return_attention_mask=False).to('cuda') outputs = model.generate(**inputs, max_length=200) text = tokenizer.batch_decode(outputs)[0] print(text) ``` I could resolve the runtime issue on `GPU` by adding cuda settiings. I beleive this would help the co-developers to try out phi-1_5. Thanks!!! --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 843cd9d..7640174 100644 --- a/README.md +++ b/README.md @@ -104,11 +104,15 @@ from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype="auto") tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype="auto") + +device = torch.device("cuda:0") +model.cuda() + inputs = tokenizer('''```python def print_prime(n): """ Print all primes between 1 and n - """''', return_tensors="pt", return_attention_mask=False) + """''', return_tensors="pt", return_attention_mask=False).to('cuda') outputs = model.generate(**inputs, max_length=200) text = tokenizer.batch_decode(outputs)[0]