From a55e552d887e14080b76c5c67e7af8b50b218875 Mon Sep 17 00:00:00 2001
From: Sungchan Yi <calofmijuck@snu.ac.kr>
Date: Thu, 13 Mar 2025 10:31:29 +0900
Subject: [PATCH] feat: include example sentences

---
 run.py | 63 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 14 deletions(-)
diff --git a/run.py b/run.py
index 03c596d..bfe9c76 100644
--- a/run.py
+++ b/run.py
@@ -2,13 +2,37 @@ import csv
 import os
 import re
 
+
 def remove_links(definitions: str) -> str:
     definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions)
     definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions)
     return definitions
 
 
-def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
+def replace_asterisks_with_italics(text: str) -> str:
+    is_odd = True
+    result = ""
+
+    for char in text:
+        if char == "*":
+            if is_odd:
+                result += "<i>"
+            else:
+                result += "</i>"
+            is_odd = not is_odd
+        else:
+            result += char
+
+    return result
+
+
+def extract_example_sentences(content: str) -> str:
+    sentences = [line[2:].strip() for line in content.split("\n")[1:] if line.strip()]
+    joined = "\n".join([f"<li>{replace_asterisks_with_italics(sentence)}</li>" for sentence in sentences])
+    return f"<ul>{joined}</ul>"
+
+
+def read_markdown_file(file_path: str, filename: str) -> tuple[str, str, str] | None:
     try:
         with open(file_path, "r", encoding="utf-8") as f:
             content = f.read()
@@ -16,11 +40,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
         word = os.path.splitext(filename)[0]
 
         content_parts = content.split("??")
-        if len(content_parts) > 1:
-            definitions = content_parts[1]
-        else:
+        if len(content_parts) <= 1:
             raise Exception("No delimiter found")
 
+        definitions = content_parts[1]
+
+        examples = extract_example_sentences(content_parts[0])
+
         filtered_lines = []
         for line in definitions.split("\n"):
             line = line.strip()
@@ -29,13 +55,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
 
         definitions = "\n".join(filtered_lines)
         definitions = remove_links(definitions)
-        return word, definitions
+        return word, definitions, examples
     except Exception as e:
         print(f"Error reading {file_path}: {e}")
         return None
 
 
-def walk_directory(directory=".") -> list[tuple[str, str]]:
+def walk_directory(directory=".") -> list[tuple[str, str, str]]:
     """
     Recursively reads all .md files in the given directory and its subdirectories.
     Prints the filename and contents of each file.
@@ -54,21 +80,30 @@ def walk_directory(directory=".") -> list[tuple[str, str]]:
             if not result:
                 continue
 
-            word, definitions = result
-            word_definitions.append((word, definitions))
+            word, definitions, examples = result
+            word_definitions.append((word, definitions, examples))
 
     return word_definitions
 
 
 if __name__ == "__main__":
-    word_definitions = walk_directory("words/PartB")
+    word_definitions = walk_directory(".")
     # Sort word_definitions alphabetically by word (first element of each tuple)
     word_definitions.sort(key=lambda x: x[0])
 
-    # Write to text file
-    with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
-        # Write each word and its definitions to the text file
-        for word, definitions in word_definitions:
-            textfile.write(f"{word},{definitions}\n\n")
+    # Write to CSV file
+    with open("word_definitions.csv", "w", encoding="utf-8", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        # Write header row
+        # Write each word and its definitions to the CSV file
+        for word, definitions, examples in word_definitions:
+            csv_writer.writerow([word, definitions, examples])
 
     print(len(word_definitions))
+
+    # print(f"Successfully wrote {len(word_definitions)} words to word_definitions.csv")
+    # # Write to text file
+    # with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
+    #     # Write each word and its definitions to the text file
+    #     for word, definitions in word_definitions:
+    #         textfile.write(f"{word},{definitions}\n\n")