From 6c19f783a009c7d375b57cbaebf2f7b90ae299f8 Mon Sep 17 00:00:00 2001 From: Sungchan Yi Date: Wed, 12 Mar 2025 22:50:04 +0900 Subject: [PATCH] feat: implementation finished --- .gitignore | 3 ++- run.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 run.py diff --git a/.gitignore b/.gitignore index 0dbf2f2..f186bcc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +words/* + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ @@ -167,4 +169,3 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ - diff --git a/run.py b/run.py new file mode 100644 index 0000000..03c596d --- /dev/null +++ b/run.py @@ -0,0 +1,74 @@ +import csv +import os +import re + +def remove_links(definitions: str) -> str: + definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions) + definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions) + return definitions + + +def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None: + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + word = os.path.splitext(filename)[0] + + content_parts = content.split("??") + if len(content_parts) > 1: + definitions = content_parts[1] + else: + raise Exception("No delimiter found") + + filtered_lines = [] + for line in definitions.split("\n"): + line = line.strip() + if line.startswith(("n.", "adj.", "adv.", "v.")): + filtered_lines.append(line) + + definitions = "\n".join(filtered_lines) + definitions = remove_links(definitions) + return word, definitions + except Exception as e: + print(f"Error reading {file_path}: {e}") + return None + + +def walk_directory(directory=".") -> list[tuple[str, str]]: + """ + Recursively reads all .md files in the given directory and its subdirectories. + Prints the filename and contents of each file. + + Args: + directory (str): The directory to start searching from. Defaults to current directory. + """ + word_definitions = [] + for root, dirs, files in os.walk(directory): + for file in files: + if not file.endswith(".md"): + continue + + file_path = os.path.join(root, file) + result = read_markdown_file(file_path, file) + if not result: + continue + + word, definitions = result + word_definitions.append((word, definitions)) + + return word_definitions + + +if __name__ == "__main__": + word_definitions = walk_directory("words/PartB") + # Sort word_definitions alphabetically by word (first element of each tuple) + word_definitions.sort(key=lambda x: x[0]) + + # Write to text file + with open("word_definitions.txt", "w", encoding="utf-8") as textfile: + # Write each word and its definitions to the text file + for word, definitions in word_definitions: + textfile.write(f"{word},{definitions}\n\n") + + print(len(word_definitions))