diff --git a/scripts/migrate_qs2_to_qs3.py b/scripts/migrate_qs2_to_qs3.py
new file mode 100644
index 0000000..ffa5d29
--- /dev/null
+++ b/scripts/migrate_qs2_to_qs3.py
@@ -0,0 +1,288 @@
+import re
+import argparse
+from pathlib import Path
+
+# Tags that will be kept as HTML as they don't have direct Markdown equivalents
+HTML_PASSTHROUGH_TAGS = {
+ "#DV": "
", "#EDV": "
",
+ "#SPN": "", "#ESPN": "",
+ "#TBL": "
", "#ETBL": "
",
+ "#TR": "
", "#ETR": "
",
+ "#TD": "
", "#ETD": "
",
+ "#TH": "
", "#ETH": "
",
+ "#ART": "", "#EART": "",
+ "#SEC": "", "#ESEC": "",
+ "#ASIDE": "",
+ "#NAV": "",
+ "#BTN": "",
+ "#SEL": "",
+ "#OPT": "",
+}
+
+def sanitize_filename(name):
+ """Sanitizes a string to be a valid filename."""
+ name = name.lower()
+ name = re.sub(r'\s+', '-', name) # Replace spaces with hyphens
+ name = re.sub(r'[^a-z0-9\-_.]', '', name) # Remove unwanted characters
+ name = re.sub(r'-+', '-', name) # Replace multiple hyphens with single
+ name = name.strip('-_')
+ return name if name else "untitled"
+
+def convert_qstags_to_markdown(content):
+ """Converts qstags in content to Markdown syntax."""
+
+ # Start with a copy to modify
+ md_content = content
+
+ # Links: #link URL¤TEXT¤ -> [TEXT](URL)
+ md_content = re.sub(r'#link\s+([^¤]+)¤([^¤]+)¤', r'[\2](\1)', md_content)
+
+ # Headings: #H1...#EH1 -> # ..., etc.
+ for i in range(6, 0, -1):
+ # Regex to capture content between #Hi and #EHi, case insensitive, dotall for newlines
+ # Makes #EHi optional if it's at the end of a section or file.
+ md_content = re.sub(r"#H{i}(.*?)(?:#EH{i}|$)".format(i=i),
+ r"{} \1".format("#"*i),
+ md_content, flags=re.IGNORECASE | re.DOTALL)
+ # Clean up potential multiple newlines left by DOTALL capture if content was multi-line
+ md_content = re.sub(r"({} .*?)\n\n".format("#"*i), r"\1\n", md_content)
+
+ # Blockquotes: #Q...#EQ -> > ...
+ # This is a simplified approach. For multi-line blockquotes, each line needs '>' prefix.
+ # We'll capture the content and then process it line by line.
+ def replace_blockquote(match):
+ inner_content = match.group(1).strip()
+ lines = inner_content.split('\n')
+ return '\n'.join([f"> {line}" for line in lines]) + '\n'
+ md_content = re.sub(r"#Q(.*?)(?:#EQ|$)", replace_blockquote, md_content, flags=re.IGNORECASE | re.DOTALL)
+
+ # Ordered Lists: #OL ... #LI ... #ELI ... #EOL
+ def replace_ordered_list(match_ol):
+ ol_content = match_ol.group(1).strip()
+ list_item_texts = re.findall(r"^[ \t]*#LI[ \t]*(.*?)[ \t]*(?:#ELI)?\s*$", ol_content, flags=re.IGNORECASE | re.MULTILINE)
+ processed_items = []
+ for i, item_text in enumerate(list_item_texts):
+ processed_items.append(f"{i + 1}. {item_text.strip()}")
+ return "\n".join(processed_items) + ("\n" if processed_items else "")
+
+ md_content = re.sub(r"^[ \t]*#OL[ \t]*\n(.*?)\n^[ \t]*#EOL[ \t]*$", replace_ordered_list, md_content, flags=re.IGNORECASE | re.DOTALL | re.MULTILINE)
+
+ # Unordered Lists: #UL ... #LI ... #ELI ... #EUL
+ def replace_unordered_list(match_ul):
+ ul_content = match_ul.group(1).strip()
+ list_item_texts = re.findall(r"^[ \t]*#LI[ \t]*(.*?)[ \t]*(?:#ELI)?\s*$", ul_content, flags=re.IGNORECASE | re.MULTILINE)
+ processed_items = []
+ for item_text in list_item_texts:
+ processed_items.append(f"- {item_text.strip()}")
+ return "\n".join(processed_items) + ("\n" if processed_items else "")
+
+ md_content = re.sub(r"^[ \t]*#UL[ \t]*\n(.*?)\n^[ \t]*#EUL[ \t]*$", replace_unordered_list, md_content, flags=re.IGNORECASE | re.DOTALL | re.MULTILINE)
+
+ # Remove any stray #ELI tags if they weren't consumed by the #LI regex (unlikely but for cleanup)
+ md_content = re.sub(r"^[ \t]*#ELI[ \t]*$", "", md_content, flags=re.IGNORECASE | re.MULTILINE)
+
+ # Paragraphs: Remove #P, replace #EP with a newline to help separate blocks.
+ # Markdown relies on blank lines between paragraphs.
+ md_content = re.sub(r"#P\s*", "", md_content, flags=re.IGNORECASE)
+ md_content = re.sub(r"\s*#EP", "\n", md_content, flags=re.IGNORECASE)
+
+ # Inline elements
+ md_content = re.sub(r"#BD(.*?)#EBD", r"**\1**", md_content, flags=re.IGNORECASE)
+ md_content = re.sub(r"#STRONG(.*?)#ESTRONG", r"**\1**", md_content, flags=re.IGNORECASE)
+ md_content = re.sub(r"#I(.*?)#EI", r"*\1*", md_content, flags=re.IGNORECASE)
+ md_content = re.sub(r"#EM(.*?)#SEM", r"*\1*", md_content, flags=re.IGNORECASE) # Assuming #SEM is end tag for emphasis
+ md_content = re.sub(r"#C(.*?)#EC", r"`\1`", md_content, flags=re.IGNORECASE)
+ md_content = re.sub(r"#UD(.*?)#EUD", r"\1", md_content, flags=re.IGNORECASE) # Markdown has no underline, strip tags
+
+ # Images: #showimg IMAGE_PATH¤ALT_TEXT¤ -> 
+ def process_image_path_for_markdown(raw_path):
+ if raw_path.startswith(('http://', 'https://', '/')):
+ return raw_path
+ else:
+ return f"/images/{raw_path}"
+
+ def replace_showimg_to_markdown(match):
+ raw_path = match.group(1)
+ alt_text = match.group(2)
+ processed_path = process_image_path_for_markdown(raw_path)
+ return f""
+ md_content = re.sub(r'#showimg\s+([^¤]+)¤([^¤]+)¤', replace_showimg_to_markdown, md_content)
+
+ # Linked Images: #linkimg IMAGE_PATH¤ALT_TEXT¤ -> [](PROCESSED_IMAGE_PATH)
+ def replace_linkimg_to_markdown(match):
+ raw_path = match.group(1)
+ alt_text = match.group(2)
+ processed_path = process_image_path_for_markdown(raw_path) # Reusing the same path processor
+ return f"[]({processed_path})"
+ md_content = re.sub(r'#linkimg\s+([^¤]+)¤([^¤]+)¤', replace_linkimg_to_markdown, md_content)
+
+ # YouTube Videos: #ytvideo YOUTUBE_ID -> HTML iframe
+ def replace_ytvideo_to_html(match):
+ video_id = match.group(1)
+ return f''
+ md_content = re.sub(r'#ytvideo\s+([A-Za-z0-9_\-]+)', replace_ytvideo_to_html, md_content)
+
+ # Line break: #BR -> two spaces + newline
+ md_content = md_content.replace("#BR", " \n")
+
+ # HTML Entities (these are fine as is, Markdown supports them)
+ md_content = md_content.replace("#LT", "<")
+ md_content = md_content.replace("#GT", ">")
+ md_content = md_content.replace("#NUM", "#")
+
+ # Passthrough HTML for tags without direct Markdown equivalents
+ for qstag, html_tag in HTML_PASSTHROUGH_TAGS.items():
+ md_content = md_content.replace(qstag, html_tag)
+
+ # Final cleanup:
+ # Normalize multiple blank lines to a single blank line (Markdown standard for paragraph separation)
+ md_content = re.sub(r"\n\s*\n", "\n\n", md_content)
+ # Remove leading/trailing whitespace from the whole content
+ md_content = md_content.strip()
+
+ return md_content
+
+def process_blog_file(file_path, output_dir_base):
+ """Processes a .blog file and creates a new Markdown file."""
+ print(f"Processing blog file: {file_path}")
+ content_lines = file_path.read_text().splitlines()
+
+ metadata = {
+ "title": "Untitled Post",
+ "date": "",
+ "layout": "post",
+ "author": "Anonymous"
+ }
+ body_content = []
+
+ # Extract date from filename (e.g., 20250530-3.blog)
+ match_date_filename = re.match(r'(\d{8})-\d+\.blog', file_path.name)
+ if match_date_filename:
+ date_str = match_date_filename.group(1)
+ metadata['date'] = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:8]}"
+ else:
+ print(f" [WARN] Could not parse date from filename: {file_path.name}. Skipping date.")
+
+ parsing_ingress = False
+ parsing_body = False
+
+ for line in content_lines:
+ if line.startswith("DATE "):
+ # DATE field in file is secondary to filename date for posts
+ pass
+ elif line.startswith("BLOG_TITLE "):
+ metadata['title'] = line.replace("BLOG_TITLE ", "", 1).strip()
+ elif line.strip() == "#INGRESS_START":
+ parsing_ingress = True
+ continue
+ elif line.strip() == "#INGRESS_STOP":
+ parsing_ingress = False
+ continue
+ elif line.strip() == "#BODY_START":
+ parsing_body = True
+ continue
+ elif line.strip() == "#BODY_STOP":
+ parsing_body = False
+ continue
+
+ if parsing_ingress or parsing_body:
+ body_content.append(line)
+
+ markdown_body = convert_qstags_to_markdown("\n".join(body_content))
+
+ escaped_title = metadata['title'].replace('"', '\\"') # Escape for YAML
+ frontmatter = [
+ "---",
+ f'title: "{escaped_title}"', # Use single quotes for f-string, double for YAML value
+ f"date: {metadata['date']}",
+ f"layout: {metadata['layout']}",
+ f"author: {metadata['author']}",
+ "---",
+ ""
+ ]
+
+ output_content = "\n".join(frontmatter) + markdown_body
+
+ sanitized_title = sanitize_filename(metadata['title'])
+ if not metadata['date']:
+ # Fallback if date couldn't be parsed, though unlikely for .blog files
+ output_subdir = Path(output_dir_base) / "blog" / "unknown_date"
+ else:
+ year, month, day = metadata['date'].split('-')
+ output_subdir = Path(output_dir_base) / "blog" / year / month / day
+
+ output_subdir.mkdir(parents=True, exist_ok=True)
+ output_file_path = output_subdir / f"{sanitized_title}.md"
+
+ output_file_path.write_text(output_content)
+ print(f" -> Created: {output_file_path}")
+
+def process_qst_file(file_path, output_dir_base):
+ """Processes a .qst file and creates a new Markdown file."""
+ print(f"Processing page file: {file_path}")
+ content_lines = file_path.read_text().splitlines()
+
+ metadata = {
+ "title": "Untitled Page",
+ "layout": "page",
+ "author": "Anonymous" # Added for consistency
+ }
+ body_content_lines = []
+
+ if content_lines and content_lines[0].startswith("#title="):
+ metadata['title'] = content_lines[0].replace("#title=", "", 1).strip()
+ body_content_lines = content_lines[1:]
+ else:
+ print(f" [WARN] No #title= found in {file_path.name}. Using filename as title.")
+ metadata['title'] = file_path.stem
+ body_content_lines = content_lines
+
+ markdown_body = convert_qstags_to_markdown("\n".join(body_content_lines))
+
+ escaped_title = metadata['title'].replace('"', '\\"') # Escape for YAML
+ frontmatter = [
+ "---",
+ f'title: "{escaped_title}"', # Use single quotes for f-string, double for YAML value
+ f"layout: {metadata['layout']}",
+ f"author: {metadata['author']}",
+ "---",
+ ""
+ ]
+
+ output_content = "\n".join(frontmatter) + markdown_body
+
+ sanitized_title = sanitize_filename(metadata['title'])
+ # Pages go into the root of the output_dir_base (e.g. content/)
+ output_file_path = Path(output_dir_base) / f"{sanitized_title}.md"
+
+ output_file_path.write_text(output_content)
+ print(f" -> Created: {output_file_path}")
+
+def main():
+ parser = argparse.ArgumentParser(description="Migrate qsgen2 (.blog, .qst) files to qsgen3 Markdown format.")
+ parser.add_argument("--source-dir", required=True, help="Directory containing old .blog and .qst files.")
+ parser.add_argument("--output-dir", required=True, help="Directory to save new Markdown files (e.g., your qsgen3 'content' directory).")
+ args = parser.parse_args()
+
+ source_path = Path(args.source_dir)
+ output_path = Path(args.output_dir)
+
+ if not source_path.is_dir():
+ print(f"Error: Source directory '{source_path}' not found or not a directory.")
+ return
+
+ output_path.mkdir(parents=True, exist_ok=True)
+ print(f"Source directory: {source_path.resolve()}")
+ print(f"Output directory: {output_path.resolve()}")
+
+ for item in source_path.rglob('*'): # rglob to find in subdirectories too, if any
+ if item.is_file():
+ if item.name.endswith(".blog"):
+ process_blog_file(item, output_path)
+ elif item.name.endswith(".qst"):
+ process_qst_file(item, output_path)
+
+ print("\nMigration complete.")
+
+if __name__ == "__main__":
+ main()