Models
Register the model that you want to evaluate, test or collect datapoints from. Models must be uniquely named within a project namespace.
The first time a model is defined, the attributes of the model are persisted. Subsequent calls to register_model
will return the persisted model. They will not update the definition.
For custom models, subsequent calls to register_model will need the CustomModel class to be initialized again.
Custom Models
Okareo can work with any model through the CustomModel
class. To use this, create a class that inherits the abstract CustomModel
class and implements the invoke
method.
See the various ways it can be implemented for classification, generation, embedding, and retrieval below.
- Classification
- Generation
- Embed
- Embed and Vector DB
from okareo import Okareo
from okareo.model_under_test import CustomModel
class CustomClassificationModel(CustomModel):
# Define the invoke method to be called on each input of a scenario
def invoke(self, input: str) -> tuple:
# call your model being tested using <input> from the scenario set
if "download now!" in input:
result = "spam"
else:
result = "not spam"
# return a tuple of (model result, overall model response context)
return result, {"response": "Classification successful" }
model_under_test = okareo.register_model(
name="intent_classifier",
model=CustomClassificationModel(name="Custom Classification model")
)
from okareo import Okareo
from okareo.model_under_test import CustomModel
from transformers import pipeline
class CustomGenerationModel(CustomModel):
def __init__(self):
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Define the invoke method to be called on each input of a scenario
def invoke(self, input: str) -> tuple:
# call your model being tested using <input> from the scenario set
result = self.summarizer(input, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
# return a tuple of (model result, overall model response context)
return result, {"model_response": "Generation successful" }
model_under_test = okareo.register_model(
name="generation",
model=CustomGenerationModel(name="Custom Generation model")
)
from okareo import Okareo
from okareo.model_under_test import CustomModel
class CustomEmbeddingModel(CustomModel):
# Define the invoke method to be called on each input of a scenario
def invoke(self, input: str) -> tuple:
# call your model being tested using <input> from the scenario set
# result should be a vector
result = [1.2, 1.4, ...]
# return a tuple of (model result, overall model response context)
return result, {"model_response": "Embedding successful" }
model_under_test = okareo.register_model(
name="embed",
model=[
CustomEmbeddingModel(name="Custom Embedding model"),
YourVectorDB()
]
)
The invoke method will need to handle the process of embedding the input, querying the vector store, and returning the results. See an example below using chromaDB.
from okareo import Okareo
from okareo.model_under_test import CustomModel
# Helper function to turn ChromaDB's query results into scores for the evaluation
def query_results_to_score(results):
# This array will store dictionaries of information about the returned documents
# The "id" field is the ID of the returned document
# The "score" field is the similarity score between the query and document
# (a higher score should be more similar)
# The "metadata" field is optional and can store a dictionary of information about the document
# The "label" field is optional and can store a string label for the document
parsed_ids_with_scores = []
# iterate through query results
for i in range(0, len(results['distances'][0])):
# this turns cosine distance into a cosine similarity score
score = (2 - results['distances'][0][i]) / 2
# Return a dictionary with the id, score, metadata, and label (metadata and label are optional)
parsed_ids_with_scores.append(
{
"id": results['ids'][0][i],
"score": score,
"metadata": results['metadatas'][0][i],
"label": f"{results['metadatas'][0][i]['article_type']} WebBizz Article w/ ID: {results['ids'][0][i]}"
}
)
return parsed_ids_with_scores
class CustomRetrievalModel(CustomModel):
# Define the invoke method to be called on each input of a scenario
def invoke(self, input: str) -> tuple:
# call your model being tested using <input> from the scenario set
# in this case, chromaDB will embed the input and query the vector store
results = collection.query(
query_texts=[input],
# Number of results to be returned
n_results=5
)
# return a tuple of (model result, overall model response context)
# model result should be a list of dictionaries with the id, score, metadata, and label
# note that metadata and label fields are optional
return query_results_to_score(result), {'model_data': input}
model_under_test = okareo.register_model(
name="Example retrieval model",
model=CustomRetrievalModel(name="Custom Retrieval model")
)
Classification Models
Okareo can work with any classification model through our custom model class. We additionally have built-in support for any of OpenAI's GPT models and Cohere's classification models.
- OpenAI
- Cohere
To use an OpenAI model, find your OpenAI model id here. To understand the temperature parameter, you can learn about model temperature.
from okareo import Okareo
from okareo.model_under_test import OpenAIModel
okareo.register_model(
name="OpenAI Classifier Model",
model=OpenAIModel(
# add the model id from OpenAI, this can be found on their docs linked above
model_id="your-model-here",
# add the temperature you want for the model
temperature=0,
# This prompt template is sent as a system message to the language model
system_prompt_template="Classify whether or not the email is spam. Output True for spam and False for not spam ...",
# User prompt is sent as the user request to the language model.
# To use the scenario input in the user prompt,
# the user prompt should have "{input}" as part of the string
user_prompt_template="The user email: {input}",
),
)
You can use a fine-tuned Cohere model by supplying its full ID in the model_id
parameter.
from okareo import Okareo
from okareo.model_under_test import CohereModel
okareo.register_model(
name="Cohere Classifier Model",
model=CohereModel(
model_type="classify",
# Add your fine-tuned model id
model_id="your-model-here",
),
)
Generation Models
Okareo can work with any generative model through our custom model class. We additionally have built-in support for any GPT models from OpenAI.
- OpenAI
To use an OpenAI model, find your OpenAI model id here. To understand the temperature parameter, you can learn about model temperature.
from okareo import Okareo
from okareo.model_under_test import OpenAIModel
okareo.register_model(
name="OpenAI Generation Model",
model=OpenAIModel(
# add the model id from OpenAI, this can be found on their docs linked above
model_id="your-model-here",
# add the temperature you want for the model
temperature=0,
# This prompt template is sent as a system message to the language model
system_prompt_template="Rephrase this piece of text ...",
# User prompt is sent as the user request to the language model.
# To use the scenario input in the user prompt,
# the user prompt should have "{input}" as part of the string
user_prompt_template="Here is the text to rephrase: {input}"
),
)
Embedding Models
Okareo can work with any embedding model through our custom model class. We additionally have built-in support for Cohere embedding models.
- Cohere
To find a model ID, see the list of the Cohere embedding models here. For example, the model ID could be embed-english-v3.0
.
from okareo import Okareo
from okareo.model_under_test import CohereModel
okareo.register_model(
name="Cohere embedding Model",
model=[
CohereModel(
model_type="embed",
# Add your model id here
model_id="your-model-here"
),
YourVectorDB()
]
)
Vector Databases
Okareo can work with any vector database through our custom model class. We additionally have built-in support for Qdrant and Pincone.
- Qdrant
- Pinecone
We have built in support to connect to a hosted Qdrant instance.
from okareo import Okareo
from okareo.model_under_test import QdrantDB
okareo.register_model(
name="Your retrieval model",
model=[
YourEmbeddingModel(),
QdrantDB(
# Your Qdrant instance url
url="...qdrant.io:port",
# Name of the collection within your Qdrant instance
collection_name="your collection name",
# How many top results should be returned from the vector search
top_k=10,
)
]
)
Instructions to find the following Pinecone parameters can be found here.
from okareo import Okareo
from okareo.model_under_test import PineconeDb
okareo.register_model(
name="Your retrieval model",
model=[
YourEmbeddingModel(),
PineconeDb(
# Name of the index within your Pinecone account
index_name="your-index-name",
# The environment/region your Pinecone collection is hosted
region="gcp-starter",
# ID of the project that the collection is in
# This should be
project_id="<your-project-id>",
# How many top results should be returned from the vector search
top_k=10,
)
]
)