feat: implementation finished
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,3 +1,5 @@
|
|||||||
|
words/*
|
||||||
|
|
||||||
# ---> Python
|
# ---> Python
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
@@ -167,4 +169,3 @@ cython_debug/
|
|||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
|
|||||||
74
run.py
Normal file
74
run.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
def remove_links(definitions: str) -> str:
|
||||||
|
definitions = re.sub(r"\[\[([^\|\]\]]*)\]\]", r"\1", definitions)
|
||||||
|
definitions = re.sub(r"\[\[[^\[\[]*\|([^\|\]\]]*)\]\]", r"\1", definitions)
|
||||||
|
return definitions
|
||||||
|
|
||||||
|
|
||||||
|
def read_markdown_file(file_path: str, filename: str) -> tuple[str, str] | None:
|
||||||
|
try:
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
word = os.path.splitext(filename)[0]
|
||||||
|
|
||||||
|
content_parts = content.split("??")
|
||||||
|
if len(content_parts) > 1:
|
||||||
|
definitions = content_parts[1]
|
||||||
|
else:
|
||||||
|
raise Exception("No delimiter found")
|
||||||
|
|
||||||
|
filtered_lines = []
|
||||||
|
for line in definitions.split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith(("n.", "adj.", "adv.", "v.")):
|
||||||
|
filtered_lines.append(line)
|
||||||
|
|
||||||
|
definitions = "\n".join(filtered_lines)
|
||||||
|
definitions = remove_links(definitions)
|
||||||
|
return word, definitions
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading {file_path}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def walk_directory(directory=".") -> list[tuple[str, str]]:
|
||||||
|
"""
|
||||||
|
Recursively reads all .md files in the given directory and its subdirectories.
|
||||||
|
Prints the filename and contents of each file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
directory (str): The directory to start searching from. Defaults to current directory.
|
||||||
|
"""
|
||||||
|
word_definitions = []
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
if not file.endswith(".md"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_path = os.path.join(root, file)
|
||||||
|
result = read_markdown_file(file_path, file)
|
||||||
|
if not result:
|
||||||
|
continue
|
||||||
|
|
||||||
|
word, definitions = result
|
||||||
|
word_definitions.append((word, definitions))
|
||||||
|
|
||||||
|
return word_definitions
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
word_definitions = walk_directory("words/PartB")
|
||||||
|
# Sort word_definitions alphabetically by word (first element of each tuple)
|
||||||
|
word_definitions.sort(key=lambda x: x[0])
|
||||||
|
|
||||||
|
# Write to text file
|
||||||
|
with open("word_definitions.txt", "w", encoding="utf-8") as textfile:
|
||||||
|
# Write each word and its definitions to the text file
|
||||||
|
for word, definitions in word_definitions:
|
||||||
|
textfile.write(f"{word},{definitions}\n\n")
|
||||||
|
|
||||||
|
print(len(word_definitions))
|
||||||
Reference in New Issue
Block a user