import csv import os import re def remove_links(definitions: str) -> str: definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions) definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions) return definitions def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None: try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() word = os.path.splitext(filename)[0] content_parts = content.split("??") if len(content_parts) > 1: definitions = content_parts[1] else: raise Exception("No delimiter found") filtered_lines = [] for line in definitions.split("\n"): line = line.strip() if line.startswith(("n.", "adj.", "adv.", "v.")): filtered_lines.append(line) definitions = "\n".join(filtered_lines) definitions = remove_links(definitions) return word, definitions except Exception as e: print(f"Error reading {file_path}: {e}") return None def walk_directory(directory=".") -> list[tuple[str, str]]: """ Recursively reads all .md files in the given directory and its subdirectories. Prints the filename and contents of each file. Args: directory (str): The directory to start searching from. Defaults to current directory. """ word_definitions = [] for root, dirs, files in os.walk(directory): for file in files: if not file.endswith(".md"): continue file_path = os.path.join(root, file) result = read_markdown_file(file_path, file) if not result: continue word, definitions = result word_definitions.append((word, definitions)) return word_definitions if __name__ == "__main__": word_definitions = walk_directory("words/PartB") # Sort word_definitions alphabetically by word (first element of each tuple) word_definitions.sort(key=lambda x: x[0]) # Write to text file with open("word_definitions.txt", "w", encoding="utf-8") as textfile: # Write each word and its definitions to the text file for word, definitions in word_definitions: textfile.write(f"{word},{definitions}\n\n") print(len(word_definitions))