feat: include example sentences

This commit is contained in:
2025-03-13 10:31:29 +09:00
parent 6c19f783a0
commit a55e552d88

63
run.py
View File

@@ -2,13 +2,37 @@ import csv
import os import os
import re import re
def remove_links(definitions: str) -> str: def remove_links(definitions: str) -> str:
definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions) definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions)
definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions) definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions)
return definitions return definitions
def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None: def replace_asterisks_with_italics(text: str) -> str:
is_odd = True
result = ""
for char in text:
if char == "*":
if is_odd:
result += "<i>"
else:
result += "</i>"
is_odd = not is_odd
else:
result += char
return result
def extract_example_sentences(content: str) -> str:
sentences = [line[2:].strip() for line in content.split("\n")[1:] if line.strip()]
joined = "\n".join([f"<li>{replace_asterisks_with_italics(sentence)}</li>" for sentence in sentences])
return f"<ul>{joined}</ul>"
def read_markdown_file(file_path: str, filename: str) -> tuple[str, str, str] | None:
try: try:
with open(file_path, "r", encoding="utf-8") as f: with open(file_path, "r", encoding="utf-8") as f:
content = f.read() content = f.read()
@@ -16,11 +40,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
word = os.path.splitext(filename)[0] word = os.path.splitext(filename)[0]
content_parts = content.split("??") content_parts = content.split("??")
if len(content_parts) > 1: if len(content_parts) <= 1:
definitions = content_parts[1]
else:
raise Exception("No delimiter found") raise Exception("No delimiter found")
definitions = content_parts[1]
examples = extract_example_sentences(content_parts[0])
filtered_lines = [] filtered_lines = []
for line in definitions.split("\n"): for line in definitions.split("\n"):
line = line.strip() line = line.strip()
@@ -29,13 +55,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
definitions = "\n".join(filtered_lines) definitions = "\n".join(filtered_lines)
definitions = remove_links(definitions) definitions = remove_links(definitions)
return word, definitions return word, definitions, examples
except Exception as e: except Exception as e:
print(f"Error reading {file_path}: {e}") print(f"Error reading {file_path}: {e}")
return None return None
def walk_directory(directory=".") -> list[tuple[str, str]]: def walk_directory(directory=".") -> list[tuple[str, str, str]]:
""" """
Recursively reads all .md files in the given directory and its subdirectories. Recursively reads all .md files in the given directory and its subdirectories.
Prints the filename and contents of each file. Prints the filename and contents of each file.
@@ -54,21 +80,30 @@ def walk_directory(directory=".") -> list[tuple[str, str]]:
if not result: if not result:
continue continue
word, definitions = result word, definitions, examples = result
word_definitions.append((word, definitions)) word_definitions.append((word, definitions, examples))
return word_definitions return word_definitions
if __name__ == "__main__": if __name__ == "__main__":
word_definitions = walk_directory("words/PartB") word_definitions = walk_directory(".")
# Sort word_definitions alphabetically by word (first element of each tuple) # Sort word_definitions alphabetically by word (first element of each tuple)
word_definitions.sort(key=lambda x: x[0]) word_definitions.sort(key=lambda x: x[0])
# Write to text file # Write to CSV file
with open("word_definitions.txt", "w", encoding="utf-8") as textfile: with open("word_definitions.csv", "w", encoding="utf-8", newline="") as csvfile:
# Write each word and its definitions to the text file csv_writer = csv.writer(csvfile)
for word, definitions in word_definitions: # Write header row
textfile.write(f"{word},{definitions}\n\n") # Write each word and its definitions to the CSV file
for word, definitions, examples in word_definitions:
csv_writer.writerow([word, definitions, examples])
print(len(word_definitions)) print(len(word_definitions))
# print(f"Successfully wrote {len(word_definitions)} words to word_definitions.csv")
# # Write to text file
# with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
# # Write each word and its definitions to the text file
# for word, definitions in word_definitions:
# textfile.write(f"{word},{definitions}\n\n")