feat: include example sentences

2025-03-13 10:31:29 +09:00
parent 6c19f783a0
commit a55e552d88
1 changed files with 49 additions and 14 deletions
--- a/run.py
+++ b/run.py
@@ -2,13 +2,37 @@ import csv
 import os
 import re
 def remove_links(definitions: str) -> str:
    definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions)
    definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions)
    return definitions
-def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
+def replace_asterisks_with_italics(text: str) -> str:
    is_odd = True
    result = ""
    for char in text:
        if char == "*":
            if is_odd:
                result += "<i>"
            else:
                result += "</i>"
            is_odd = not is_odd
        else:
            result += char
    return result
 def extract_example_sentences(content: str) -> str:
    sentences = [line[2:].strip() for line in content.split("\n")[1:] if line.strip()]
    joined = "\n".join([f"<li>{replace_asterisks_with_italics(sentence)}</li>" for sentence in sentences])
    return f"<ul>{joined}</ul>"
 def read_markdown_file(file_path: str, filename: str) -> tuple[str, str, str] | None:
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
@@ -16,11 +40,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
        word = os.path.splitext(filename)[0]
        content_parts = content.split("??")
-        if len(content_parts) > 1:
+        if len(content_parts) <= 1:
            definitions = content_parts[1]
        else:
            raise Exception("No delimiter found")
        definitions = content_parts[1]
        examples = extract_example_sentences(content_parts[0])
        filtered_lines = []
        for line in definitions.split("\n"):
            line = line.strip()
@@ -29,13 +55,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
        definitions = "\n".join(filtered_lines)
        definitions = remove_links(definitions)
-        return word, definitions
+        return word, definitions, examples
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None
-def walk_directory(directory=".") -> list[tuple[str, str]]:
+def walk_directory(directory=".") -> list[tuple[str, str, str]]:
    """
    Recursively reads all .md files in the given directory and its subdirectories.
    Prints the filename and contents of each file.
@@ -54,21 +80,30 @@ def walk_directory(directory=".") -> list[tuple[str, str]]:
            if not result:
                continue
-            word, definitions = result
+            word, definitions, examples = result
-            word_definitions.append((word, definitions))
+            word_definitions.append((word, definitions, examples))
    return word_definitions
 if __name__ == "__main__":
-    word_definitions = walk_directory("words/PartB")
+    word_definitions = walk_directory(".")
    # Sort word_definitions alphabetically by word (first element of each tuple)
    word_definitions.sort(key=lambda x: x[0])
-    # Write to text file
+    # Write to CSV file
-    with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
+    with open("word_definitions.csv", "w", encoding="utf-8", newline="") as csvfile:
-        # Write each word and its definitions to the text file
+        csv_writer = csv.writer(csvfile)
-        for word, definitions in word_definitions:
+        # Write header row
-            textfile.write(f"{word},{definitions}\n\n")
+        # Write each word and its definitions to the CSV file
        for word, definitions, examples in word_definitions:
            csv_writer.writerow([word, definitions, examples])
    print(len(word_definitions))
    # print(f"Successfully wrote {len(word_definitions)} words to word_definitions.csv")
    # # Write to text file
    # with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
    #     # Write each word and its definitions to the text file
    #     for word, definitions in word_definitions:
    #         textfile.write(f"{word},{definitions}\n\n")