From a55e552d887e14080b76c5c67e7af8b50b218875 Mon Sep 17 00:00:00 2001 From: Sungchan Yi Date: Thu, 13 Mar 2025 10:31:29 +0900 Subject: [PATCH] feat: include example sentences --- run.py | 63 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/run.py b/run.py index 03c596d..bfe9c76 100644 --- a/run.py +++ b/run.py @@ -2,13 +2,37 @@ import csv import os import re + def remove_links(definitions: str) -> str: definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions) definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions) return definitions -def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None: +def replace_asterisks_with_italics(text: str) -> str: + is_odd = True + result = "" + + for char in text: + if char == "*": + if is_odd: + result += "" + else: + result += "" + is_odd = not is_odd + else: + result += char + + return result + + +def extract_example_sentences(content: str) -> str: + sentences = [line[2:].strip() for line in content.split("\n")[1:] if line.strip()] + joined = "\n".join([f"
  • {replace_asterisks_with_italics(sentence)}
  • " for sentence in sentences]) + return f"" + + +def read_markdown_file(file_path: str, filename: str) -> tuple[str, str, str] | None: try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() @@ -16,11 +40,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None: word = os.path.splitext(filename)[0] content_parts = content.split("??") - if len(content_parts) > 1: - definitions = content_parts[1] - else: + if len(content_parts) <= 1: raise Exception("No delimiter found") + definitions = content_parts[1] + + examples = extract_example_sentences(content_parts[0]) + filtered_lines = [] for line in definitions.split("\n"): line = line.strip() @@ -29,13 +55,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None: definitions = "\n".join(filtered_lines) definitions = remove_links(definitions) - return word, definitions + return word, definitions, examples except Exception as e: print(f"Error reading {file_path}: {e}") return None -def walk_directory(directory=".") -> list[tuple[str, str]]: +def walk_directory(directory=".") -> list[tuple[str, str, str]]: """ Recursively reads all .md files in the given directory and its subdirectories. Prints the filename and contents of each file. @@ -54,21 +80,30 @@ def walk_directory(directory=".") -> list[tuple[str, str]]: if not result: continue - word, definitions = result - word_definitions.append((word, definitions)) + word, definitions, examples = result + word_definitions.append((word, definitions, examples)) return word_definitions if __name__ == "__main__": - word_definitions = walk_directory("words/PartB") + word_definitions = walk_directory(".") # Sort word_definitions alphabetically by word (first element of each tuple) word_definitions.sort(key=lambda x: x[0]) - # Write to text file - with open("word_definitions.txt", "w", encoding="utf-8") as textfile: - # Write each word and its definitions to the text file - for word, definitions in word_definitions: - textfile.write(f"{word},{definitions}\n\n") + # Write to CSV file + with open("word_definitions.csv", "w", encoding="utf-8", newline="") as csvfile: + csv_writer = csv.writer(csvfile) + # Write header row + # Write each word and its definitions to the CSV file + for word, definitions, examples in word_definitions: + csv_writer.writerow([word, definitions, examples]) print(len(word_definitions)) + + # print(f"Successfully wrote {len(word_definitions)} words to word_definitions.csv") + # # Write to text file + # with open("word_definitions.txt", "w", encoding="utf-8") as textfile: + # # Write each word and its definitions to the text file + # for word, definitions in word_definitions: + # textfile.write(f"{word},{definitions}\n\n")