Extracting Dependency Trees to Supply ChatGPT with Context
Extracting Dependency Trees to Supply ChatGPT with Context
I recently created a Python script that helps me when I'm coding with ChatGPT. This script scans a given class file and its folder, and then outputs a text file containing all the relevant code from that folder based on the dependencies found in the class file. This is especially useful when working with ChatGPT, as it allows the AI to understand local references in your code.
The script performs the following steps:
- Accepts a file and a folder as input.
- Searches the folder for all files with the same extension as the input file.
- Analyzes the text of the input file to find any referenced filenames.
- Recursively searches for dependencies in the found files, creating a dependency tree.
- Stops searching when the dependency tree remains unchanged between iterations.
- Outputs the dependency chain, sorted by proximity to the original file and then by name.
- Allows the user to remove irrelevant files from the list before generating the output.
Please note that this script only works for programming languages where the class name matches the file name.
The Code
import os
import re
import tkinter as tk
from tkinter import filedialog
from datetime import datetime
def get_all_files_with_ext(path, ext):
files_with_ext = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(ext):
files_with_ext.append(os.path.join(root, file))
return files_with_ext
def filter_file(folder, file, last_extension):
with open(file, 'r') as f:
content = f.read()
words = set(re.findall(r'\w+', content))
dependencies = []
for root, dirs, files in os.walk(folder):
for filename in files:
file_basename, file_ext = os.path.splitext(filename)
if file_basename in words and file_ext == last_extension:
dependencies.append(os.path.join(root, filename))
return dependencies
def filter_files(folder, file):
# Get the last file extension
input_file_ext = '.' + file.split('.')[-1]
dependencies = filter_file(folder, file, input_file_ext)
new_dependencies = []
while True:
for dependency in dependencies:
new_dependencies.extend(filter_file(folder, dependency, input_file_ext))
new_dependencies = list(set(new_dependencies))
if set(dependencies) == set(new_dependencies):
break
dependencies = list(set(dependencies).union(set(new_dependencies)))
new_dependencies.clear()
return dependencies
def combine_cs_files(cs_files, output_file, root_path):
with open(output_file, 'w') as outfile:
for file in cs_files:
relative_path = os.path.relpath(file, root_path)
header = f"// File: {relative_path}\n"
outfile.write(header)
with open(file, 'r') as infile:
content = infile.read()
outfile.write(content)
outfile.write('\n\n')
def display_class_list(filtered_files, folder_path, file_path):
def on_submit():
selected_files.extend([file_var.get() for file_var in file_vars if file_var.get()])
window.destroy()
def _on_mousewheel(event):
canvas.yview_scroll(-1 * int(event.delta / 120), "units")
def _on_button4(event):
canvas.yview_scroll(-1, "units")
def _on_button5(event):
canvas.yview_scroll(1, "units")
selected_files = []
window = tk.Toplevel()
window.title("Select Files to Combine")
frame = tk.Frame(window)
frame.pack()
canvas = tk.Canvas(frame)
scrollbar = tk.Scrollbar(frame, orient="vertical", command=canvas.yview)
scrollable_frame = tk.Frame(canvas)
scrollable_frame.bind(
"",
lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
)
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
# Bind mouse wheel events for scrolling
canvas.bind_all("", _on_mousewheel)
canvas.bind_all("", _on_button4)
canvas.bind_all("", _on_button5)
# Sort filtered_files based on their relative paths
for file in filtered_files:
filtered_files.sort(key=lambda file: (
os.path.relpath(file, folder_path) != os.path.relpath(file_path, folder_path),
folder_distance(file_path, folder_path, file),
'/' in os.path.relpath(file, folder_path) or '\\' in os.path.relpath(file, folder_path),
os.path.relpath(file, folder_path).lower()
))
file_vars = []
for file in filtered_files:
relative_path = os.path.relpath(file, folder_path)
# Check if the relative path contains a slash
if '/' not in relative_path and '\\' not in relative_path:
# Add a slash at the beginning if it doesn't
relative_path = '\\' + relative_path
file_var = tk.StringVar(value=file)
chk = tk.Checkbutton(scrollable_frame, text=relative_path, variable=file_var, onvalue=file, offvalue="")
chk.pack(anchor="w")
file_vars.append(file_var)
canvas.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")
submit_button = tk.Button(window, text="Submit", command=on_submit)
submit_button.pack()
window.wait_window()
return selected_files
def folder_distance(input_file, base_folder, other_file):
input_folder = os.path.dirname(input_file)
input_folder_relative = os.path.relpath(input_folder, base_folder)
input_folder_parts = input_folder_relative.split(os.path.sep)
other_folder = os.path.dirname(other_file)
other_folder_relative = os.path.relpath(other_folder, base_folder)
other_folder_parts = other_folder_relative.split(os.path.sep)
shared_base_idx = -1
for idx, (input_folder_part, other_folder_part) in enumerate(zip(input_folder_parts, other_folder_parts)):
if input_folder_part != other_folder_part:
break
shared_base_idx = idx
distance = (len(input_folder_parts) - shared_base_idx -1)*1.1 + (len(other_folder_parts) - shared_base_idx - 1)
return distance
def main():
root = tk.Tk()
root.withdraw()
folder_path = filedialog.askdirectory(title="Select Folder with .cs Files")
if not folder_path:
print("No folder selected, exiting.")
return
file_path = filedialog.askopenfilename(title="Select the .cs File to Start Filtering")
if not file_path:
print("No file selected, exiting.")
return
selected_file_ext = os.path.splitext(file_path)[1]
files_with_ext = get_all_files_with_ext(folder_path, selected_file_ext)
filtered_files = filter_files(folder_path, file_path)
if not filtered_files:
print("No .cs files found after filtering, exiting.")
return
user_selected_files = display_class_list(filtered_files, folder_path, file_path)
if not user_selected_files:
print("No files selected, exiting.")
return
script_dir = os.path.dirname(os.path.abspath(__file__))
folder_name = os.path.basename(folder_path)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
file_name = os.path.basename(file_path)
file_base, _ = os.path.splitext(file_name)
folder_name = os.path.basename(folder_path)
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
output_file = os.path.join(script_dir, f'{file_base}_from_{folder_name}_at_{timestamp}_combined_files.txt')
combine_cs_files(user_selected_files, output_file, folder_path)
print(f"Combined .cs files have been saved to: {output_file}")
if __name__ == "__main__":
main()
I developed this script using ChatGPT, which generated the initial Python script. I then asked ChatGPT to make various modifications to the script, such as adding a user interface. The result is a handy tool that streamlines my coding process when working with ChatGPT.
You can find a copy of the script in this GitHub Gist.
Comments
Post a Comment