From 6c19f783a009c7d375b57cbaebf2f7b90ae299f8 Mon Sep 17 00:00:00 2001
From: Sungchan Yi <calofmijuck@snu.ac.kr>
Date: Wed, 12 Mar 2025 22:50:04 +0900
Subject: [PATCH] feat: implementation finished

---
 .gitignore |  3 ++-
 run.py     | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 run.py

diff --git a/.gitignore b/.gitignore
index 0dbf2f2..f186bcc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+words/*
+
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -167,4 +169,3 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..03c596d
--- /dev/null
+++ b/run.py
@@ -0,0 +1,74 @@
+import csv
+import os
+import re
+
+def remove_links(definitions: str) -> str:
+    definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions)
+    definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions)
+    return definitions
+
+
+def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        word = os.path.splitext(filename)[0]
+
+        content_parts = content.split("??")
+        if len(content_parts) > 1:
+            definitions = content_parts[1]
+        else:
+            raise Exception("No delimiter found")
+
+        filtered_lines = []
+        for line in definitions.split("\n"):
+            line = line.strip()
+            if line.startswith(("n.", "adj.", "adv.", "v.")):
+                filtered_lines.append(line)
+
+        definitions = "\n".join(filtered_lines)
+        definitions = remove_links(definitions)
+        return word, definitions
+    except Exception as e:
+        print(f"Error reading {file_path}: {e}")
+        return None
+
+
+def walk_directory(directory=".") -> list[tuple[str, str]]:
+    """
+    Recursively reads all .md files in the given directory and its subdirectories.
+    Prints the filename and contents of each file.
+
+    Args:
+        directory (str): The directory to start searching from. Defaults to current directory.
+    """
+    word_definitions = []
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if not file.endswith(".md"):
+                continue
+
+            file_path = os.path.join(root, file)
+            result = read_markdown_file(file_path, file)
+            if not result:
+                continue
+
+            word, definitions = result
+            word_definitions.append((word, definitions))
+
+    return word_definitions
+
+
+if __name__ == "__main__":
+    word_definitions = walk_directory("words/PartB")
+    # Sort word_definitions alphabetically by word (first element of each tuple)
+    word_definitions.sort(key=lambda x: x[0])
+
+    # Write to text file
+    with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
+        # Write each word and its definitions to the text file
+        for word, definitions in word_definitions:
+            textfile.write(f"{word},{definitions}\n\n")
+
+    print(len(word_definitions))