diff --git a/.gitignore b/.gitignore index 89762fd..36f0fdf 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,6 @@ include/qsgen2/lang/*.en output/ # Scripts directory (temporary/conversion scripts) -/scripts/ /tools/ # Build output diff --git a/bin/qsgen3 b/bin/qsgen3 index 73efa1c..69ad584 100755 --- a/bin/qsgen3 +++ b/bin/qsgen3 @@ -17,6 +17,7 @@ umask 0022 # --- Configuration --- # Associative array to hold configuration values typeset -A QSG_CONFIG +typeset -a SITEMAP_URLS=() # Array to store URLs for sitemap # --- Script Paths --- @@ -282,6 +283,64 @@ _check_dependencies() { _log INFO "All critical dependencies found." } +# --- Sitemap Generation --- +_generate_sitemap() { + _log DEBUG "Entered _generate_sitemap" + if [[ "${QSG_CONFIG[build_options_generate_sitemap]}" != "true" ]]; then + _log INFO "Sitemap generation is disabled in configuration. Skipping." + return 0 + fi + + if [[ -z "${QSG_CONFIG[site_url]}" ]]; then + _log WARNING "'site_url' is not set in configuration. Cannot generate sitemap." + return 1 # Indicate an issue + fi + + if [[ ${#SITEMAP_URLS[@]} -eq 0 ]]; then + _log INFO "No URLs collected for sitemap. Skipping sitemap generation." + return 0 + fi + + _log INFO "Generating sitemap..." + local sitemap_file="${QSG_CONFIG[paths_output_dir]}/sitemap.xml" + local site_url="${QSG_CONFIG[site_url]}" + # Ensure site_url does not end with a slash for clean concatenation + site_url="${site_url%/}" + + local last_mod_date + last_mod_date=$(date +%Y-%m-%d) # Current date as last modified + + # Start XML structure + # Using a subshell for cleaner output redirection to the file + ( + echo '' + echo '' + + local rel_url + for rel_url in "${SITEMAP_URLS[@]}"; do + # Ensure rel_url doesn't start with a slash if site_url already provides it + # However, our collected URLs (e.g., posts/file.html, index.html) are fine. + echo ' ' + echo " ${site_url}/${rel_url}" + echo " ${last_mod_date}" + # Optional: and + # echo " weekly" + # echo " 0.8" + echo ' ' + done + + echo '' + ) > "$sitemap_file" + + if [[ $? -eq 0 ]]; then + _log SUCCESS "Sitemap generated successfully: $sitemap_file" + else + _log ERROR "Failed to write sitemap to $sitemap_file" + return 1 # Indicate an issue + fi + return 0 +} + # --- Core Functions --- _clean_output_dir() { _log INFO "Cleaning output directory: ${QSG_CONFIG[paths_output_dir]}" @@ -632,25 +691,54 @@ _process_markdown_files() { if [[ -z "$source_file" ]]; then continue; fi _log DEBUG "Processing Markdown file: $source_file" - local relative_path="${source_file#$content_dir/}" + local relative_path_from_content_root="${source_file#$content_dir/}" if [[ "$content_dir" == "$source_file" ]]; then - relative_path=$(basename "$source_file") + relative_path_from_content_root=$(basename "$source_file") elif [[ "$content_dir" == "/" && "$source_file" == /* ]]; then - relative_path="${source_file#/}" + relative_path_from_content_root="${source_file#/}" elif [[ "$content_dir" == "." && "$source_file" == ./* ]]; then - relative_path="${source_file#./}" + relative_path_from_content_root="${source_file#./}" fi - local output_file_html_part="${relative_path%.md}.html" - local output_file_abs="$output_dir/$output_file_html_part" + local dir_part="" + local filename_md_part="" - mkdir -p "$(dirname "$output_file_abs")" + if [[ "$relative_path_from_content_root" == */* ]]; then + dir_part="${relative_path_from_content_root%/*}/" + filename_md_part="${relative_path_from_content_root##*/}" + else + dir_part="" + filename_md_part="$relative_path_from_content_root" + fi + + local filename_base_orig="${filename_md_part%.md}" + + # Sanitize filename_base_orig + local sanitized_filename_base="${filename_base_orig:l}" # Lowercase + sanitized_filename_base=${sanitized_filename_base//[[:space:],;\']/'-'} # Replace spaces, commas, semicolons, apostrophes + + # Consolidate multiple hyphens into one + sanitized_filename_base=$(echo "$sanitized_filename_base" | tr -s '-') + + # Remove leading hyphens + while [[ "$sanitized_filename_base" == -* ]]; do + sanitized_filename_base="${sanitized_filename_base#-}" + done + # Remove trailing hyphens + while [[ "$sanitized_filename_base" == *- ]]; do + sanitized_filename_base="${sanitized_filename_base%-}" + done + + if [[ -z "$sanitized_filename_base" ]]; then + _log WARNING "Original filename base '$filename_base_orig' from '$source_file' resulted in empty sanitized name. Using 'untitled' as fallback." + sanitized_filename_base="untitled" + fi local frontmatter=$(sed -n '/^---$/,/^---$/{/^---$/d;p;}' "$source_file") - local title=$(echo "$frontmatter" | grep -m1 -iE '^title:' | sed -E 's/^title:[[:space:]]*//i; s/^["\x27](.*)["\x27]$/\1/') - local date=$(echo "$frontmatter" | grep -m1 -iE '^date:' | sed -E 's/^date:[[:space:]]*//i; s/^["\x27](.*)["\x27]$/\1/') + local title=$(echo "$frontmatter" | grep -m1 -iE '^title:' | sed -E 's/^title:[[:space:]]*//i; s/^[\"\x27](.*)[\"\x27]$/\1/') + local date=$(echo "$frontmatter" | grep -m1 -iE '^date:' | sed -E 's/^date:[[:space:]]*//i; s/^[\"\x27](.*)[\"\x27]$/\1/') local draft=$(echo "$frontmatter" | grep -m1 -iE '^draft:' | sed -E 's/^draft:[[:space:]]*//i' | tr '[:upper:]' '[:lower:]') - local custom_layout_fm=$(echo "$frontmatter" | grep -m1 -iE '^layout:' | sed -E 's/^layout:[[:space:]]*//i; s/^["\x27](.*)["\x27]$/\1/') + local custom_layout_fm=$(echo "$frontmatter" | grep -m1 -iE '^layout:' | sed -E 's/^layout:[[:space:]]*//i; s/^[\"\x27](.*)[\"\x27]$/\1/') if [[ "$draft" == "true" && "${QSG_CONFIG[build_options_process_drafts]}" != "true" ]]; then _log DEBUG "Skipping draft file: $source_file" @@ -658,11 +746,49 @@ _process_markdown_files() { fi if [[ -z "$title" ]]; then - _log WARNING "Markdown file '$source_file' is missing a title. Using filename as fallback." - local fn_no_ext=$(basename "$source_file") - title="${fn_no_ext%.md}" + _log WARNING "Markdown file '$source_file' is missing a title. Using filename_base_orig ('$filename_base_orig') as fallback." + title="$filename_base_orig" fi + # Determine output URL directory structure + local output_url_dir_part="" + # The variable 'relative_path_from_content_root' holds the path relative to 'content_dir' (e.g., posts/my-post.md) + # The variable 'date' is extracted from frontmatter earlier + # The variable 'dir_part' holds the original source directory (e.g., "posts/" or "pages/") + + if [[ "$relative_path_from_content_root" == posts/* ]]; then # Check if it's a blog post from content/posts/ + if [[ -n "$date" ]]; then # Date was extracted earlier from frontmatter + # Assuming date is YYYY-MM-DD + local parsed_year=$(echo "$date" | awk -F'-' '{print $1}') + local parsed_month=$(echo "$date" | awk -F'-' '{print $2}') + local parsed_day=$(echo "$date" | awk -F'-' '{print $3}') + + local valid_date_parts=true + if ! [[ "$parsed_year" =~ ^[0-9]{4}$ ]]; then valid_date_parts=false; fi + if ! [[ "$parsed_month" =~ ^(0[1-9]|1[0-2])$ ]]; then valid_date_parts=false; fi # Validates MM is 01-12 + if ! [[ "$parsed_day" =~ ^(0[1-9]|[12][0-9]|3[01])$ ]]; then valid_date_parts=false; fi # Validates DD is 01-31 + + if [[ "$valid_date_parts" == true ]]; then + output_url_dir_part="blog/$parsed_year/$parsed_month/$parsed_day/" + else + _log WARNING "Blog post '$source_file' (date: '$date') has invalid date components. Required: YYYY-MM-DD. URL will be 'blog/${sanitized_filename_base}.html'." + output_url_dir_part="blog/" + fi + else + _log WARNING "Blog post '$source_file' is missing a date. URL will be 'blog/${sanitized_filename_base}.html'." + output_url_dir_part="blog/" + fi + else # It's a regular page, not a blog post from content/posts/ + output_url_dir_part="$dir_part" # Use its original directory structure (e.g., "pages/" or "" for root files) + fi + + local output_file_html_part="${output_url_dir_part}${sanitized_filename_base}.html" + local output_file_abs="$output_dir/$output_file_html_part" + + _log DEBUG "Source: $source_file -> Output URL Path: $output_file_html_part (Original base: '$filename_base_orig', Sanitized: '$sanitized_filename_base')" + + mkdir -p "$(dirname "$output_file_abs")" + local template_to_use="" if [[ -n "$custom_layout_fm" ]]; then if [[ "$custom_layout_fm" != *.html && "$custom_layout_fm" != *.xml ]]; then custom_layout_fm+=".html"; fi @@ -674,7 +800,7 @@ _process_markdown_files() { fi if [[ -z "$template_to_use" ]]; then - if [[ "$relative_path" == posts/* && -f "$default_post_template" ]]; then + if [[ "$relative_path_from_content_root" == posts/* && -f "$default_post_template" ]]; then template_to_use="$default_post_template" elif [[ -f "$default_page_template" ]]; then template_to_use="$default_page_template" @@ -718,13 +844,25 @@ _process_markdown_files() { else _log DEBUG "No CSS specified for post $source_file." fi - - _log INFO "Generating $output_file_abs from $source_file using template $template_to_use" - if ! "${pandoc_cmd[@]}"; then - _log ERROR "Pandoc failed for $source_file. Command was: ${pandoc_cmd[*]}" + + _log DEBUG "Executing Pandoc for $source_file. Command constructed with individual arguments (see array definition)." + "${pandoc_cmd[@]}" + local pandoc_exit_code=$? + + if [[ $pandoc_exit_code -eq 0 ]]; then + _log DEBUG "Successfully processed $source_file to $output_file_abs" + # Add to sitemap URLs + if [[ "${QSG_CONFIG[build_options_generate_sitemap]}" == "true" ]]; then + SITEMAP_URLS+=("$output_file_html_part") + _log DEBUG "Added to sitemap URLs: $output_file_html_part" + fi else - _log DEBUG "Successfully generated $output_file_abs" + _log ERROR "Pandoc failed for $source_file (exit code: $pandoc_exit_code). Output: $output_file_abs" + # Optionally, decide if one failure should stop all, or just skip this file + # For now, we continue processing other files + continue fi + done < <(find "$content_dir" -type f -name '*.md' -print0 | xargs -0 -r realpath) _log INFO "Finished processing Markdown files." @@ -1094,6 +1232,9 @@ main() { _generate_rss_feed if [[ $? -ne 0 ]]; then _log ERROR "RSS feed generation failed."; exit 1; fi + _generate_sitemap + if [[ $? -ne 0 ]]; then _log WARNING "Sitemap generation encountered issues."; fi # Non-fatal, just a warning + _log INFO "Final state of output directory (${QSG_CONFIG[paths_output_dir]}): $(ls -R "${QSG_CONFIG[paths_output_dir]}" 2>&1)" _log SUCCESS "Site generation complete! Output: ${QSG_CONFIG[paths_output_dir]}"