Module src.llm_api.llama_chat_assistant
Expand source code
from openai import OpenAI
from openai.types.chat import ChatCompletion, ChatCompletionMessageParam
from openai import BadRequestError
from llm_api.utils import together
from llm_api.iassistant import IAssistant
from llm_api.utils import (
generate_together_completion,
get_together_text,
conversation_prompt_to_instruct,
)
MODEL_NAME = "togethercomputer/llama-2-70b-chat"
FALLBACK_MODEL_NAME = "togethercomputer/Llama-2-7B-32K-Instruct"
# LLaMA2 with longer context window https://www.together.ai/blog/llama-2-7b-32k
# MODEL_NAME = FALLBACK_MODEL_NAME
def _get_response(response: ChatCompletion) -> str:
"""
Get text response from the OpenAI API response.
"""
if not response.choices or not response.choices[0].message.content:
print(response)
raise ValueError("Invalid response or text not found")
return response.choices[0].message.content
class LLaMA2ChatAssistant(IAssistant):
def __init__(self, api_key: str) -> None:
self.client = OpenAI(
api_key=api_key,
base_url="https://api.together.xyz",
)
together.api_key = api_key
def generate_response(
self,
conversation: list[ChatCompletionMessageParam],
temperature: float = 1.0,
top_p: float = 1.0,
) -> str:
"""
Generate a response based on a conversation context and/or a specific message.
Parameters:
- conversation (list[dict]): A list of message objects representing the conversation history, where there may be multiple messages from the user and/or the system.
Returns:
str: The generated response from the LLM.
"""
try:
response = self.client.chat.completions.create(
model=MODEL_NAME,
messages=conversation,
temperature=temperature,
top_p=top_p,
)
except BadRequestError as e:
# try to generate a response with a bigger context window model
print(
"Error generating response with the main model, trying fallback model"
)
return get_together_text(
generate_together_completion(
prompt=conversation_prompt_to_instruct(conversation),
model=FALLBACK_MODEL_NAME,
temperature=temperature,
top_p=top_p,
)
)
return _get_response(response)
def get_conversation_tokens(
self, conversation: list[ChatCompletionMessageParam]
) -> int:
"""
Get tokens from a conversation.
"""
raise NotImplementedError("Not implemented yet")
Classes
class LLaMA2ChatAssistant (api_key: str)
-
Interface for a Large Language Model (LLM). It processes conversation contexts to generate response.
Expand source code
class LLaMA2ChatAssistant(IAssistant): def __init__(self, api_key: str) -> None: self.client = OpenAI( api_key=api_key, base_url="https://api.together.xyz", ) together.api_key = api_key def generate_response( self, conversation: list[ChatCompletionMessageParam], temperature: float = 1.0, top_p: float = 1.0, ) -> str: """ Generate a response based on a conversation context and/or a specific message. Parameters: - conversation (list[dict]): A list of message objects representing the conversation history, where there may be multiple messages from the user and/or the system. Returns: str: The generated response from the LLM. """ try: response = self.client.chat.completions.create( model=MODEL_NAME, messages=conversation, temperature=temperature, top_p=top_p, ) except BadRequestError as e: # try to generate a response with a bigger context window model print( "Error generating response with the main model, trying fallback model" ) return get_together_text( generate_together_completion( prompt=conversation_prompt_to_instruct(conversation), model=FALLBACK_MODEL_NAME, temperature=temperature, top_p=top_p, ) ) return _get_response(response) def get_conversation_tokens( self, conversation: list[ChatCompletionMessageParam] ) -> int: """ Get tokens from a conversation. """ raise NotImplementedError("Not implemented yet")
Ancestors
- llm_api.iassistant.IAssistant
- abc.ABC
Methods
def generate_response(self, conversation: list[typing.Union[openai.types.chat.chat_completion_system_message_param.ChatCompletionSystemMessageParam, openai.types.chat.chat_completion_user_message_param.ChatCompletionUserMessageParam, openai.types.chat.chat_completion_assistant_message_param.ChatCompletionAssistantMessageParam, openai.types.chat.chat_completion_tool_message_param.ChatCompletionToolMessageParam, openai.types.chat.chat_completion_function_message_param.ChatCompletionFunctionMessageParam]], temperature: float = 1.0, top_p: float = 1.0) ‑> str
-
Generate a response based on a conversation context and/or a specific message.
Parameters: - conversation (list[dict]): A list of message objects representing the conversation history, where there may be multiple messages from the user and/or the system.
Returns: str: The generated response from the LLM.
Expand source code
def generate_response( self, conversation: list[ChatCompletionMessageParam], temperature: float = 1.0, top_p: float = 1.0, ) -> str: """ Generate a response based on a conversation context and/or a specific message. Parameters: - conversation (list[dict]): A list of message objects representing the conversation history, where there may be multiple messages from the user and/or the system. Returns: str: The generated response from the LLM. """ try: response = self.client.chat.completions.create( model=MODEL_NAME, messages=conversation, temperature=temperature, top_p=top_p, ) except BadRequestError as e: # try to generate a response with a bigger context window model print( "Error generating response with the main model, trying fallback model" ) return get_together_text( generate_together_completion( prompt=conversation_prompt_to_instruct(conversation), model=FALLBACK_MODEL_NAME, temperature=temperature, top_p=top_p, ) ) return _get_response(response)
def get_conversation_tokens(self, conversation: list[typing.Union[openai.types.chat.chat_completion_system_message_param.ChatCompletionSystemMessageParam, openai.types.chat.chat_completion_user_message_param.ChatCompletionUserMessageParam, openai.types.chat.chat_completion_assistant_message_param.ChatCompletionAssistantMessageParam, openai.types.chat.chat_completion_tool_message_param.ChatCompletionToolMessageParam, openai.types.chat.chat_completion_function_message_param.ChatCompletionFunctionMessageParam]]) ‑> int
-
Get tokens from a conversation.
Expand source code
def get_conversation_tokens( self, conversation: list[ChatCompletionMessageParam] ) -> int: """ Get tokens from a conversation. """ raise NotImplementedError("Not implemented yet")