import os
os.environ["TRANSFORMERS_VERBOSITY"] = "error"

!pip install transformers
!pip install datasets==2.15.0

from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",  # store the model on GPU
    torch_dtype="auto",  # automatically determines the best data type
    trust_remote_code=False,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

from transformers import pipeline

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    max_new_tokens=500,
    do_sample=False
)

messages = [
    {"role": "user", "content": "Where is Utrecht?"}
]

output = generator(messages)
print(output)

messages = [
    {"role": "system", "content": "Respond as if you're a 15-year old girl named Lisa, who loves thrillers."},
    {"role": "user", "content": "What is your favorite movie?"}
]
print(generator(messages)[0]['generated_text'])

messages = [
    {"role": "system", "content": "You're a 50-year-old man named Dave, who has a dry sense of humor and loves sci-fi movies."},
    {"role": "user", "content": "What is your favorite movie?"}
]
print(generator(messages)[0]['generated_text'])

messages = [
    {"role": "system", "content": "You are a high school teacher."},
    {"role": "user", "content": "Explain photosynthesis to 13 year old. "}
]
print(generator(messages)[0]['generated_text'])

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Who wrote 'Pride and Prejudice'?"},
    {"role": "assistant", "content": "Jane Austen wrote 'Pride and Prejudice'."},
    {"role": "user", "content": "What else did she write?"}
]
print(generator(messages)[0]['generated_text'])

from datasets import load_dataset

# Load a sentiment dataset, only the first 10 instances
dataset = load_dataset("glue", "sst2", split="validation[:10]")

# Pipeline for zero-shot prompting
classification_generator = pipeline(
    "text-generation",
    model= model,
    tokenizer= tokenizer,
    max_new_tokens= 50,
    do_sample= False,
    return_full_text = False
)

dataset[:2]

# Format and run examples
for example in dataset:
    text = example["sentence"]
    prompt = f"""### Instruction:
Is the sentence below Positive or Negative? Only answer with Positive or Negative.

### Text:
"{text}"

### Sentiment:"""
    messages = [
      {"role": "user", "content": prompt}
  ]


    output = classification_generator(messages)[0]['generated_text']
    print(f"Text: {text}")
    print(f"Predicted Sentiment: {output}")
    print("---" * 10)

tokens = tokenizer("Where is Utrecht?")

print(tokens)
print(tokenizer.convert_ids_to_tokens(tokens['input_ids']))

vocab = tokenizer.get_vocab()

# Sort the vocabulary by token ID to get the "first" tokens
sorted_vocab = sorted(vocab.items(), key=lambda item: item[1])

# Print some tokens
for token, token_id in sorted_vocab[1000:1050]:
    print(f"{token_id:>3}: {token}")

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gc

# if you run into out of memory error, you can either restart the notebook
# and just load this model, explicitly delete the previous model
# from memory

# del model
# del tokenizer
# del generator
#gc.collect()
#torch.cuda.empty_cache()
#print(torch.cuda.memory_allocated())


# Load model and tokenizer
smol_model = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceTB/SmolLM3-3B",
    device_map="cuda",  # store the model on GPU
    torch_dtype="auto",  # automatically determines the best data type
    trust_remote_code=False,
)
smol_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM3-3B")

smol_generator = pipeline(
    "text-generation",
    model=smol_model,
    tokenizer=smol_tokenizer,
    return_full_text=False,
    max_new_tokens=500,
    do_sample=True, ## apply sampling
)

messages = [
    {"role": "user", "content": "How are you?"}
]

print(smol_generator(messages))

Practical 9: LLMs pre-training, prompting, & learning from human feedback¶

Settings¶

Phi-3-mini-4k-instruct¶

System message¶

Simulate a chat history¶

Classification¶

Tokenizer¶

If you have the time: experiment with another model¶