Extracting Dependency Trees to Supply ChatGPT with Context

Extracting Dependency Trees to Supply ChatGPT with Context

I recently created a Python script that helps me when I'm coding with ChatGPT. This script scans a given class file and its folder, and then outputs a text file containing all the relevant code from that folder based on the dependencies found in the class file. This is especially useful when working with ChatGPT, as it allows the AI to understand local references in your code.

The script performs the following steps:

  1. Accepts a file and a folder as input.
  2. Searches the folder for all files with the same extension as the input file.
  3. Analyzes the text of the input file to find any referenced filenames.
  4. Recursively searches for dependencies in the found files, creating a dependency tree.
  5. Stops searching when the dependency tree remains unchanged between iterations.
  6. Outputs the dependency chain, sorted by proximity to the original file and then by name.
  7. Allows the user to remove irrelevant files from the list before generating the output.

Please note that this script only works for programming languages where the class name matches the file name.

The Code


import os
import re
import tkinter as tk
from tkinter import filedialog
from datetime import datetime

def get_all_files_with_ext(path, ext):
    files_with_ext = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(ext):
                files_with_ext.append(os.path.join(root, file))
    return files_with_ext

def filter_file(folder, file, last_extension):
    with open(file, 'r') as f:
        content = f.read()
        words = set(re.findall(r'\w+', content))

    dependencies = []
    for root, dirs, files in os.walk(folder):
        for filename in files:
            file_basename, file_ext = os.path.splitext(filename)
            if file_basename in words and file_ext == last_extension:
                dependencies.append(os.path.join(root, filename))

    return dependencies

def filter_files(folder, file):
    # Get the last file extension
    input_file_ext = '.' + file.split('.')[-1]

    dependencies = filter_file(folder, file, input_file_ext)
    new_dependencies = []

    while True:
        for dependency in dependencies:
            new_dependencies.extend(filter_file(folder, dependency, input_file_ext))

        new_dependencies = list(set(new_dependencies))

        if set(dependencies) == set(new_dependencies):
            break

        dependencies = list(set(dependencies).union(set(new_dependencies)))
        new_dependencies.clear()

    return dependencies


def combine_cs_files(cs_files, output_file, root_path):
    with open(output_file, 'w') as outfile:
        for file in cs_files:
            relative_path = os.path.relpath(file, root_path)
            header = f"// File: {relative_path}\n"
            outfile.write(header)
            with open(file, 'r') as infile:
                content = infile.read()
                outfile.write(content)
                outfile.write('\n\n')

def display_class_list(filtered_files, folder_path, file_path):
    def on_submit():
        selected_files.extend([file_var.get() for file_var in file_vars if file_var.get()])
        window.destroy()

    def _on_mousewheel(event):
        canvas.yview_scroll(-1 * int(event.delta / 120), "units")

    def _on_button4(event):
        canvas.yview_scroll(-1, "units")

    def _on_button5(event):
        canvas.yview_scroll(1, "units")

    selected_files = []
    window = tk.Toplevel()
    window.title("Select Files to Combine")

    frame = tk.Frame(window)
    frame.pack()

    canvas = tk.Canvas(frame)
    scrollbar = tk.Scrollbar(frame, orient="vertical", command=canvas.yview)
    scrollable_frame = tk.Frame(canvas)

    scrollable_frame.bind(
        "",
        lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
    )

    canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
    canvas.configure(yscrollcommand=scrollbar.set)

    # Bind mouse wheel events for scrolling
    canvas.bind_all("", _on_mousewheel)
    canvas.bind_all("", _on_button4)
    canvas.bind_all("", _on_button5)

    # Sort filtered_files based on their relative paths
    for file in filtered_files:
        filtered_files.sort(key=lambda file: (
            os.path.relpath(file, folder_path) != os.path.relpath(file_path, folder_path),
            folder_distance(file_path, folder_path, file),
            '/' in os.path.relpath(file, folder_path) or '\\' in os.path.relpath(file, folder_path),
            os.path.relpath(file, folder_path).lower()
        ))

    file_vars = []
    for file in filtered_files:
        relative_path = os.path.relpath(file, folder_path)

        # Check if the relative path contains a slash
        if '/' not in relative_path and '\\' not in relative_path:
            # Add a slash at the beginning if it doesn't
            relative_path = '\\' + relative_path

        file_var = tk.StringVar(value=file)
        chk = tk.Checkbutton(scrollable_frame, text=relative_path, variable=file_var, onvalue=file, offvalue="")
        chk.pack(anchor="w")
        file_vars.append(file_var)

    canvas.pack(side="left", fill="both", expand=True)
    scrollbar.pack(side="right", fill="y")

    submit_button = tk.Button(window, text="Submit", command=on_submit)
    submit_button.pack()

    window.wait_window()
    return selected_files

def folder_distance(input_file, base_folder, other_file):
    input_folder = os.path.dirname(input_file)
    input_folder_relative = os.path.relpath(input_folder, base_folder)
    input_folder_parts = input_folder_relative.split(os.path.sep)

    other_folder = os.path.dirname(other_file)
    other_folder_relative = os.path.relpath(other_folder, base_folder)
    other_folder_parts = other_folder_relative.split(os.path.sep)

    shared_base_idx = -1
    for idx, (input_folder_part, other_folder_part) in enumerate(zip(input_folder_parts, other_folder_parts)):
        if input_folder_part != other_folder_part:
            break
        shared_base_idx = idx

    distance = (len(input_folder_parts) - shared_base_idx -1)*1.1 + (len(other_folder_parts) - shared_base_idx - 1)
    return distance


def main():
    root = tk.Tk()
    root.withdraw()

    folder_path = filedialog.askdirectory(title="Select Folder with .cs Files")
    if not folder_path:
        print("No folder selected, exiting.")
        return

    file_path = filedialog.askopenfilename(title="Select the .cs File to Start Filtering")
    if not file_path:
        print("No file selected, exiting.")
        return

    selected_file_ext = os.path.splitext(file_path)[1]

    files_with_ext = get_all_files_with_ext(folder_path, selected_file_ext)
    filtered_files = filter_files(folder_path, file_path)

    if not filtered_files:
        print("No .cs files found after filtering, exiting.")
        return

    user_selected_files = display_class_list(filtered_files, folder_path, file_path)

    if not user_selected_files:
        print("No files selected, exiting.")
        return

    script_dir = os.path.dirname(os.path.abspath(__file__))
    folder_name = os.path.basename(folder_path)
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    file_name = os.path.basename(file_path)
    file_base, _ = os.path.splitext(file_name)

    folder_name = os.path.basename(folder_path)

    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    output_file = os.path.join(script_dir, f'{file_base}_from_{folder_name}_at_{timestamp}_combined_files.txt')

    combine_cs_files(user_selected_files, output_file, folder_path)
    print(f"Combined .cs files have been saved to: {output_file}")

if __name__ == "__main__":
    main()
	

I developed this script using ChatGPT, which generated the initial Python script. I then asked ChatGPT to make various modifications to the script, such as adding a user interface. The result is a handy tool that streamlines my coding process when working with ChatGPT.

You can find a copy of the script in this GitHub Gist.

Comments

Popular posts from this blog

Using Kanban Boards to Stay Organized and to Stay Motivated

Importance over Immediacy

Low Level Design Part 1: Reading and Gathering Information