Module src.core.analysis
Expand source code
import json
import csv
from datetime import datetime
from typing import Union
from core.conversation import Conversation
from core.utils import Colors, print_message
from llm_api.iassistant import IAssistant
from models.models import ConversationRolesInternalEnum, Message
from prompt_manager.ipromptmanager import IPromptManager
from runtime.iruntime import IRuntime
# TODO: Rewrite the cell in case of error
class CodeRetryLimitExceeded(Exception):
"""Exception raised when too many errors occur during code execution."""
def __init__(self, message="Exceeded code retry limit"):
self.message = message
super().__init__(self.message)
def save_to_csv(data: list) -> None:
"""
Save the results of the analysis to a CSV file.
"""
filename = "results.csv"
# Check if file exists
file_exists = False
try:
with open(filename, "r") as f:
file_exists = True
except FileNotFoundError:
file_exists = False
# Open the file in append mode ('a'). If the file doesn't exist, it will be created.
with open(filename, "a", newline="") as f:
writer = csv.writer(f, delimiter=";")
if not file_exists:
# If file does not exist, write the header
writer.writerow(
[
"code_assistant_type",
"prompt_type",
"dataset_name",
"report_path",
"error_count",
"code_messages_missing_snippets",
"msg_count",
"analysis_message_limit",
"exception",
]
)
# Rest can be calculated from these formulas:
# analyst_count = (msg_count + 1) // 2
# code_count = msg_count // 2 + error_count
# total_assistant_requests = msg_count + error_count
writer.writerow(data)
def analyze(
dataset_path: str,
runtime: IRuntime,
code_assistant: IAssistant,
analysis_assistant: IAssistant,
prompt: IPromptManager,
analysis_message_limit: Union[int, None] = None,
output_pdf_path: str = None,
) -> tuple[str, int, int]:
"""
Conduct the automated tabular data analysis using LLM for a given dataset.
Returns the path to the generated report.
"""
conv_list: list[Message] = []
dataset_file_name = dataset_path.split("/")[-1]
runtime.upload_file(dataset_path, dataset_file_name)
try:
report_name = output_pdf_path.split("/")[-1].split(".")[0]
except:
report_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
if report_name == "":
report_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
load_dataset_code = "\n".join(
["import pandas as pd", f"df= pd.read_csv('{dataset_file_name}', sep=',')"]
)
cell_idx = runtime.add_code(load_dataset_code)
runtime.execute_cell(cell_idx)
initial_message = "Dataset is loaded into the runtime in the variable 'df'.'\nYou can try to print the first 5 rows of the dataset by executing the following code: ```python\ndf.head()```"
runtime.add_description(initial_message)
cell_idx = runtime.add_code("df.head()")
runtime.execute_cell(cell_idx)
conv_list.append(
Message(
role=ConversationRolesInternalEnum.CODE,
content=Conversation.format_code_assistant_message(
initial_message, runtime.get_cell_output_stream(cell_idx)
),
)
)
print_message(conv_list[-1], Colors.PURPLE)
conv = Conversation(runtime, code_assistant, analysis_assistant, prompt, conv_list)
error_count = 0
msg_count = 0
try:
while analysis_message_limit is None or msg_count < analysis_message_limit:
if analysis_message_limit is None and "q" in input(
f"{Colors.BOLD_BLACK.value}Press 'q' to quit or any other key to continue: {Colors.END.value}"
):
break
msg = conv.perform_next_step()
msg_count += 1
code_retry_limit = 3
while conv.last_msg_contains_execution_errors():
error_count += 1
print_message(msg, Colors.RED)
if code_retry_limit == 0:
print("Exceeded code retry limit")
raise CodeRetryLimitExceeded(
f"Code assistant exceeded retry limit for code execution and could not fix the code for 3 consecutive times."
)
msg = conv.fix_last_code_message()
code_retry_limit -= 1
print_message(
msg,
Colors.PURPLE
if msg.role == ConversationRolesInternalEnum.CODE
else Colors.BLUE,
)
except Exception as e:
try:
report_path = runtime.generate_report("reports", report_name)
except Exception as ex:
report_path = None
save_to_csv(
[
code_assistant.__class__.__name__,
prompt.__class__.__name__,
dataset_file_name,
report_path,
error_count,
conv.code_messages_missing_snippets,
msg_count,
analysis_message_limit,
e,
]
)
raise e
report_path = runtime.generate_report("reports", report_name)
print(
f"{Colors.BOLD_RED.value}Total number of errors: {error_count}{Colors.END.value}"
)
print(
f"{Colors.BOLD_YELLOW.value}Report has been saved to {report_path}{Colors.END.value}"
)
print(
f"{Colors.BOLD_BLUE.value}Code Assistant messages missing code snippets: {conv.code_messages_missing_snippets}{Colors.END.value}"
)
save_to_csv(
[
code_assistant.__class__.__name__,
prompt.__class__.__name__,
dataset_file_name,
report_path,
error_count,
conv.code_messages_missing_snippets,
msg_count,
analysis_message_limit,
None,
]
)
conv_json = conv.get_conversation_json()
conv_path = f"conversations/conversation-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.json"
with open(conv_path, "w") as f:
json.dump(conv_json, f, indent=4)
print(
f"{Colors.BOLD_YELLOW.value}Conversation has been saved to {conv_path}{Colors.END.value}"
)
return report_path, error_count, conv.code_messages_missing_snippets
Functions
def analyze(dataset_path: str, runtime: runtime.iruntime.IRuntime, code_assistant: llm_api.iassistant.IAssistant, analysis_assistant: llm_api.iassistant.IAssistant, prompt: prompt_manager.ipromptmanager.IPromptManager, analysis_message_limit: Optional[int] = None, output_pdf_path: str = None) ‑> tuple[str, int, int]
-
Conduct the automated tabular data analysis using LLM for a given dataset. Returns the path to the generated report.
Expand source code
def analyze( dataset_path: str, runtime: IRuntime, code_assistant: IAssistant, analysis_assistant: IAssistant, prompt: IPromptManager, analysis_message_limit: Union[int, None] = None, output_pdf_path: str = None, ) -> tuple[str, int, int]: """ Conduct the automated tabular data analysis using LLM for a given dataset. Returns the path to the generated report. """ conv_list: list[Message] = [] dataset_file_name = dataset_path.split("/")[-1] runtime.upload_file(dataset_path, dataset_file_name) try: report_name = output_pdf_path.split("/")[-1].split(".")[0] except: report_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") if report_name == "": report_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") load_dataset_code = "\n".join( ["import pandas as pd", f"df= pd.read_csv('{dataset_file_name}', sep=',')"] ) cell_idx = runtime.add_code(load_dataset_code) runtime.execute_cell(cell_idx) initial_message = "Dataset is loaded into the runtime in the variable 'df'.'\nYou can try to print the first 5 rows of the dataset by executing the following code: ```python\ndf.head()```" runtime.add_description(initial_message) cell_idx = runtime.add_code("df.head()") runtime.execute_cell(cell_idx) conv_list.append( Message( role=ConversationRolesInternalEnum.CODE, content=Conversation.format_code_assistant_message( initial_message, runtime.get_cell_output_stream(cell_idx) ), ) ) print_message(conv_list[-1], Colors.PURPLE) conv = Conversation(runtime, code_assistant, analysis_assistant, prompt, conv_list) error_count = 0 msg_count = 0 try: while analysis_message_limit is None or msg_count < analysis_message_limit: if analysis_message_limit is None and "q" in input( f"{Colors.BOLD_BLACK.value}Press 'q' to quit or any other key to continue: {Colors.END.value}" ): break msg = conv.perform_next_step() msg_count += 1 code_retry_limit = 3 while conv.last_msg_contains_execution_errors(): error_count += 1 print_message(msg, Colors.RED) if code_retry_limit == 0: print("Exceeded code retry limit") raise CodeRetryLimitExceeded( f"Code assistant exceeded retry limit for code execution and could not fix the code for 3 consecutive times." ) msg = conv.fix_last_code_message() code_retry_limit -= 1 print_message( msg, Colors.PURPLE if msg.role == ConversationRolesInternalEnum.CODE else Colors.BLUE, ) except Exception as e: try: report_path = runtime.generate_report("reports", report_name) except Exception as ex: report_path = None save_to_csv( [ code_assistant.__class__.__name__, prompt.__class__.__name__, dataset_file_name, report_path, error_count, conv.code_messages_missing_snippets, msg_count, analysis_message_limit, e, ] ) raise e report_path = runtime.generate_report("reports", report_name) print( f"{Colors.BOLD_RED.value}Total number of errors: {error_count}{Colors.END.value}" ) print( f"{Colors.BOLD_YELLOW.value}Report has been saved to {report_path}{Colors.END.value}" ) print( f"{Colors.BOLD_BLUE.value}Code Assistant messages missing code snippets: {conv.code_messages_missing_snippets}{Colors.END.value}" ) save_to_csv( [ code_assistant.__class__.__name__, prompt.__class__.__name__, dataset_file_name, report_path, error_count, conv.code_messages_missing_snippets, msg_count, analysis_message_limit, None, ] ) conv_json = conv.get_conversation_json() conv_path = f"conversations/conversation-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.json" with open(conv_path, "w") as f: json.dump(conv_json, f, indent=4) print( f"{Colors.BOLD_YELLOW.value}Conversation has been saved to {conv_path}{Colors.END.value}" ) return report_path, error_count, conv.code_messages_missing_snippets
def save_to_csv(data: list) ‑> None
-
Save the results of the analysis to a CSV file.
Expand source code
def save_to_csv(data: list) -> None: """ Save the results of the analysis to a CSV file. """ filename = "results.csv" # Check if file exists file_exists = False try: with open(filename, "r") as f: file_exists = True except FileNotFoundError: file_exists = False # Open the file in append mode ('a'). If the file doesn't exist, it will be created. with open(filename, "a", newline="") as f: writer = csv.writer(f, delimiter=";") if not file_exists: # If file does not exist, write the header writer.writerow( [ "code_assistant_type", "prompt_type", "dataset_name", "report_path", "error_count", "code_messages_missing_snippets", "msg_count", "analysis_message_limit", "exception", ] ) # Rest can be calculated from these formulas: # analyst_count = (msg_count + 1) // 2 # code_count = msg_count // 2 + error_count # total_assistant_requests = msg_count + error_count writer.writerow(data)
Classes
class CodeRetryLimitExceeded (message='Exceeded code retry limit')
-
Exception raised when too many errors occur during code execution.
Expand source code
class CodeRetryLimitExceeded(Exception): """Exception raised when too many errors occur during code execution.""" def __init__(self, message="Exceeded code retry limit"): self.message = message super().__init__(self.message)
Ancestors
- builtins.Exception
- builtins.BaseException