diff --git a/run.py b/run.py
index 03c596d..bfe9c76 100644
--- a/run.py
+++ b/run.py
@@ -2,13 +2,37 @@ import csv
import os
import re
+
def remove_links(definitions: str) -> str:
definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions)
definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions)
return definitions
-def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
+def replace_asterisks_with_italics(text: str) -> str:
+ is_odd = True
+ result = ""
+
+ for char in text:
+ if char == "*":
+ if is_odd:
+ result += ""
+ else:
+ result += ""
+ is_odd = not is_odd
+ else:
+ result += char
+
+ return result
+
+
+def extract_example_sentences(content: str) -> str:
+ sentences = [line[2:].strip() for line in content.split("\n")[1:] if line.strip()]
+ joined = "\n".join([f"
{replace_asterisks_with_italics(sentence)}" for sentence in sentences])
+ return f""
+
+
+def read_markdown_file(file_path: str, filename: str) -> tuple[str, str, str] | None:
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
@@ -16,11 +40,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
word = os.path.splitext(filename)[0]
content_parts = content.split("??")
- if len(content_parts) > 1:
- definitions = content_parts[1]
- else:
+ if len(content_parts) <= 1:
raise Exception("No delimiter found")
+ definitions = content_parts[1]
+
+ examples = extract_example_sentences(content_parts[0])
+
filtered_lines = []
for line in definitions.split("\n"):
line = line.strip()
@@ -29,13 +55,13 @@ def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
definitions = "\n".join(filtered_lines)
definitions = remove_links(definitions)
- return word, definitions
+ return word, definitions, examples
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
-def walk_directory(directory=".") -> list[tuple[str, str]]:
+def walk_directory(directory=".") -> list[tuple[str, str, str]]:
"""
Recursively reads all .md files in the given directory and its subdirectories.
Prints the filename and contents of each file.
@@ -54,21 +80,30 @@ def walk_directory(directory=".") -> list[tuple[str, str]]:
if not result:
continue
- word, definitions = result
- word_definitions.append((word, definitions))
+ word, definitions, examples = result
+ word_definitions.append((word, definitions, examples))
return word_definitions
if __name__ == "__main__":
- word_definitions = walk_directory("words/PartB")
+ word_definitions = walk_directory(".")
# Sort word_definitions alphabetically by word (first element of each tuple)
word_definitions.sort(key=lambda x: x[0])
- # Write to text file
- with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
- # Write each word and its definitions to the text file
- for word, definitions in word_definitions:
- textfile.write(f"{word},{definitions}\n\n")
+ # Write to CSV file
+ with open("word_definitions.csv", "w", encoding="utf-8", newline="") as csvfile:
+ csv_writer = csv.writer(csvfile)
+ # Write header row
+ # Write each word and its definitions to the CSV file
+ for word, definitions, examples in word_definitions:
+ csv_writer.writerow([word, definitions, examples])
print(len(word_definitions))
+
+ # print(f"Successfully wrote {len(word_definitions)} words to word_definitions.csv")
+ # # Write to text file
+ # with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
+ # # Write each word and its definitions to the text file
+ # for word, definitions in word_definitions:
+ # textfile.write(f"{word},{definitions}\n\n")