Pico V1

Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection.

When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section.

Pico v1 struggles with non-question related tasks (Small talk, roleplay, etc).

Here is a example of how you can use it:

import torch

phi3_template = (
    "{{ bos_token }}"
    "{% for message in messages %}"
        "{{ '<|' + message['role'] + '|>\\n' + message['content'] + '<|end|>\\n' }}"
    "{% endfor %}"
    "{% if add_generation_prompt %}"
        "{{ '<|assistant|>\\n' }}"
    "{% endif %}"
)
phi3_template_eos_token = "<|end|>"

def build_prompt(messages, bos_token="<|start|>", add_generation_prompt=True):
    """
    Build a prompt using the PHI 3.5 template.
    """
    prompt = bos_token
    for message in messages:
        prompt += f"<|{message['role']}|>\n{message['content']}\n<|end|>\n"
    if add_generation_prompt:
        prompt += "<|assistant|>\n"
    return prompt

def chat_with_model():
    # Load the model and tokenizer
    model_name = "LucidityAI/Pico-v1-3b"
    print("Loading model and tokenizer...")

    # Enforce GPU usage
    if not torch.cuda.is_available():
        raise RuntimeError("CUDA is not available. Please ensure your GPU and CUDA environment are configured correctly.")

    device = torch.device("cuda")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
    print("Model and tokenizer loaded successfully.")

    # Chat loop
    print("Start chatting with the model! Type 'exit' to quit.")
    conversation = []
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Goodbye!")
            break

        # Append user's message to the conversation
        conversation.append({"role": "user", "content": user_input})

        # Build the input prompt using the PHI 3.5 template
        prompt = build_prompt(conversation, bos_token=tokenizer.bos_token or "<|start|>")

        # Tokenize the input prompt
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)

        # Generate a response
        outputs = model.generate(
            inputs.input_ids,
            max_length=1024,
            num_return_sequences=1,
            temperature=0.5,
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode the response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract the assistant's reply
        assistant_reply = response[len(prompt):].strip()
        print(f"Model: {assistant_reply}")

        # Append the assistant's reply to the conversation
        conversation.append({"role": "assistant", "content": assistant_reply})

if __name__ == "__main__":
    chat_with_model()
Downloads last month
127
Safetensors
Model size
3.82B params
Tensor type
F32
·
FP16
·
I8
·
Inference Providers NEW
This model is not currently available via any of the supported third-party Inference Providers, and the model is not deployed on the HF Inference API.

Model tree for LucidityAI/Pico-v1-3b

Quantized
(125)
this model

Collection including LucidityAI/Pico-v1-3b