import concurrent.futures import json import logging import numpy as np from openai import OpenAI from typing import Union, List class BaseChat: def __init__(self, config: dict) -> None: self.config = config def prepare_function_calling(self, tools): # prepare function calling function_map = {} if tools is not None and len(tools) > 0: for tool in tools: function_name = tool["function"]["name"] function_map[function_name] = tool["function"]["function_to_call"] functions = [] for tool in tools: fn = tool["function"] functions.append( {"type": tool["type"], "function": {x: fn[x] for x in fn if x != "function_to_call"}} ) logging.info(f"{len(tools)} available functions:") logging.info(function_map.keys()) else: functions = None return function_map, functions class OpenAIChat(BaseChat): def __init__(self, config: dict) -> None: self.config = config if self.config["openai"]["base_url"] is not None and self.config["openai"]["base_url"] != "": base_url = self.config["openai"]["base_url"] else: base_url = None self.client = OpenAI(base_url=base_url, api_key=self.config["openai"]["api_key"]) def chat(self, messages, tools) -> str: function_map, functions = self.prepare_function_calling(tools) logging.debug("Sending request to OpenAI.") llm_response = self.client.chat.completions.create( model=self.config["openai"]["chat_model"], messages=messages, tools=functions, temperature=self.config["temperature"], tool_choice="auto" if functions is not None else None, ) logging.debug("LLM response:") logging.debug(llm_response.choices) if llm_response.choices[0].message.tool_calls: # Handle function calls followup_response = self.execute_function_call( llm_response.choices[0].message, function_map, messages ) return followup_response.choices[0].message.content.strip() else: return llm_response.choices[0].message.content.strip() def execute_function_call(self, message, function_map: dict, messages: list) -> str: """ Executes function calls embedded in a LLM message in parallel, and returns a LLM response based on the results. Parameters: - message: LLM message containing the function calls. - function_map (dict): dict of {"function_name": function()} - message (list): message history """ tool_calls = message.tool_calls logging.info(f"Got {len(tool_calls)} function call(s).") def execute_single_tool_call(tool_call): """Helper function to execute a single tool call""" logging.info(f"Attempting to execute function requested by LLM ({tool_call.function.name}, {tool_call.function.arguments}).") if tool_call.function.name in function_map: function_to_call = function_map[tool_call.function.name] args = json.loads(tool_call.function.arguments) logging.debug(f"Calling function {tool_call.function.name} with args: {args}") function_response = function_to_call(**args) logging.debug(function_response) return { "role": "function", "tool_call_id": tool_call.id, "name": tool_call.function.name, "content": function_response, } else: logging.info(f"{tool_call.function.name} not in function_map") logging.info(function_map.keys()) return None # Execute tool calls in parallel with concurrent.futures.ThreadPoolExecutor() as executor: # Submit all tool calls to the executor future_to_tool_call = { executor.submit(execute_single_tool_call, tool_call): tool_call for tool_call in tool_calls } # Collect results as they complete for future in concurrent.futures.as_completed(future_to_tool_call): result = future.result() if result is not None: messages.append(result) logging.debug("Functions called, sending the results to LLM.") llm_response = self.client.chat.completions.create( model=self.config["openai"]["chat_model"], messages=messages, temperature=self.config["temperature"], ) logging.debug("Got response from LLM:") logging.debug(llm_response) return llm_response class LMStudioChat(BaseChat): def __init__(self, config: dict) -> None: self.config = config self.client = OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed") def chat(self, messages, tools) -> str: logging.info("Sending request to local LLM.") llm_response = self.client.chat.completions.create( model="", messages=messages, temperature=self.config["temperature"], ) return llm_response.choices[0].message.content.strip() class LlamaChat(BaseChat): def __init__(self, config: dict) -> None: self.config = config self.client = Llama( self.config["local_model_dir"] + "TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q6_K.gguf", n_gpu_layers=32, n_ctx=2048, verbose=False ) def chat(self, messages: list, response_format: dict = None, tools: list = None) -> str: if tools is not None: logging.warning("Tools was provided to LlamaChat, but it's not yet supported.") logging.info("Sending request to local LLM.") logging.info(messages) llm_response = self.client.create_chat_completion( messages = messages, response_format = response_format, temperature = self.config["temperature"], ) return llm_response["choices"][0]["message"]["content"].strip() class LLM: def __init__(self, config: dict) -> None: """ LLM constructor. Parameters: - config (dict): llm-config parsed from the llm part of config.yml """ self.config = config self.chat_client = OpenAIChat(self.config) def query( self, user_msg: str, system_msg: Union[None, str] = None, history: list = [], tools: Union[None, list] = None, ): """ Query the LLM Parameters: - user_msg (str): query from the user (will be appended to messages as {"role": "user"}) - system_msg (str): query from the user (will be prepended to messages as {"role": "system"}) - history (list): optional, list of messages to inject between system_msg and user_msg. - tools: optional, list of functions that may be called by the LLM. Example: { "type": "function", "function": { "name": "get_current_weather", "function": get_current_weather, "description": "Get the current weather", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", } }, "required": ["location", "format"], }, }, } """ messages = [] if system_msg is not None: messages.append({"role": "system", "content": system_msg}) if history and len(history) > 0: logging.info("History") logging.info(history) messages += history messages.append({"role": "user", "content": user_msg}) logging.info(f"Sending request to LLM with {len(messages)} messages.") answer = self.chat_client.chat(messages=messages, tools=tools) return answer