feat(qsgen3): Enhance output naming, URL structure, and sitemap
Implements several improvements to the qsgen3 site generator: - **Filename Sanitization**: Introduces robust sanitization for output HTML filenames, converting to lowercase, replacing special characters with hyphens, and ensuring clean names. - **Blog Post URL Structure**: Blog posts from content/posts/ are now generated with a blog/YYYY/MM/DD/ URL structure if a valid date is present in frontmatter. Defaults to blog/filename.html if no valid date. - **Improved Title Fallback**: Uses the original filename (before sanitization) as a fallback title if not specified in frontmatter. - **Enhanced Pandoc Error Handling**: Better logging for Pandoc execution and explicit check of its exit code, allowing the script to continue on single file errors. - **Sitemap Integration**: Adds successfully generated pages to sitemap URL list. - **Non-fatal Sitemap Generation**: Sitemap generation failures are now logged as warnings and do not halt the script. Also, unignores the /scripts/ directory in .gitignore to include migration scripts in the repository.
This commit is contained in:
parent
1283eb30cb
commit
4b95426256
1
.gitignore
vendored
1
.gitignore
vendored
@ -22,7 +22,6 @@ include/qsgen2/lang/*.en
|
||||
output/
|
||||
|
||||
# Scripts directory (temporary/conversion scripts)
|
||||
/scripts/
|
||||
/tools/
|
||||
|
||||
# Build output
|
||||
|
179
bin/qsgen3
179
bin/qsgen3
@ -17,6 +17,7 @@ umask 0022
|
||||
# --- Configuration ---
|
||||
# Associative array to hold configuration values
|
||||
typeset -A QSG_CONFIG
|
||||
typeset -a SITEMAP_URLS=() # Array to store URLs for sitemap
|
||||
|
||||
|
||||
# --- Script Paths ---
|
||||
@ -282,6 +283,64 @@ _check_dependencies() {
|
||||
_log INFO "All critical dependencies found."
|
||||
}
|
||||
|
||||
# --- Sitemap Generation ---
|
||||
_generate_sitemap() {
|
||||
_log DEBUG "Entered _generate_sitemap"
|
||||
if [[ "${QSG_CONFIG[build_options_generate_sitemap]}" != "true" ]]; then
|
||||
_log INFO "Sitemap generation is disabled in configuration. Skipping."
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ -z "${QSG_CONFIG[site_url]}" ]]; then
|
||||
_log WARNING "'site_url' is not set in configuration. Cannot generate sitemap."
|
||||
return 1 # Indicate an issue
|
||||
fi
|
||||
|
||||
if [[ ${#SITEMAP_URLS[@]} -eq 0 ]]; then
|
||||
_log INFO "No URLs collected for sitemap. Skipping sitemap generation."
|
||||
return 0
|
||||
fi
|
||||
|
||||
_log INFO "Generating sitemap..."
|
||||
local sitemap_file="${QSG_CONFIG[paths_output_dir]}/sitemap.xml"
|
||||
local site_url="${QSG_CONFIG[site_url]}"
|
||||
# Ensure site_url does not end with a slash for clean concatenation
|
||||
site_url="${site_url%/}"
|
||||
|
||||
local last_mod_date
|
||||
last_mod_date=$(date +%Y-%m-%d) # Current date as last modified
|
||||
|
||||
# Start XML structure
|
||||
# Using a subshell for cleaner output redirection to the file
|
||||
(
|
||||
echo '<?xml version="1.0" encoding="UTF-8"?>'
|
||||
echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
|
||||
|
||||
local rel_url
|
||||
for rel_url in "${SITEMAP_URLS[@]}"; do
|
||||
# Ensure rel_url doesn't start with a slash if site_url already provides it
|
||||
# However, our collected URLs (e.g., posts/file.html, index.html) are fine.
|
||||
echo ' <url>'
|
||||
echo " <loc>${site_url}/${rel_url}</loc>"
|
||||
echo " <lastmod>${last_mod_date}</lastmod>"
|
||||
# Optional: <changefreq> and <priority>
|
||||
# echo " <changefreq>weekly</changefreq>"
|
||||
# echo " <priority>0.8</priority>"
|
||||
echo ' </url>'
|
||||
done
|
||||
|
||||
echo '</urlset>'
|
||||
) > "$sitemap_file"
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
_log SUCCESS "Sitemap generated successfully: $sitemap_file"
|
||||
else
|
||||
_log ERROR "Failed to write sitemap to $sitemap_file"
|
||||
return 1 # Indicate an issue
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# --- Core Functions ---
|
||||
_clean_output_dir() {
|
||||
_log INFO "Cleaning output directory: ${QSG_CONFIG[paths_output_dir]}"
|
||||
@ -632,25 +691,54 @@ _process_markdown_files() {
|
||||
if [[ -z "$source_file" ]]; then continue; fi
|
||||
_log DEBUG "Processing Markdown file: $source_file"
|
||||
|
||||
local relative_path="${source_file#$content_dir/}"
|
||||
local relative_path_from_content_root="${source_file#$content_dir/}"
|
||||
if [[ "$content_dir" == "$source_file" ]]; then
|
||||
relative_path=$(basename "$source_file")
|
||||
relative_path_from_content_root=$(basename "$source_file")
|
||||
elif [[ "$content_dir" == "/" && "$source_file" == /* ]]; then
|
||||
relative_path="${source_file#/}"
|
||||
relative_path_from_content_root="${source_file#/}"
|
||||
elif [[ "$content_dir" == "." && "$source_file" == ./* ]]; then
|
||||
relative_path="${source_file#./}"
|
||||
relative_path_from_content_root="${source_file#./}"
|
||||
fi
|
||||
|
||||
local output_file_html_part="${relative_path%.md}.html"
|
||||
local output_file_abs="$output_dir/$output_file_html_part"
|
||||
local dir_part=""
|
||||
local filename_md_part=""
|
||||
|
||||
mkdir -p "$(dirname "$output_file_abs")"
|
||||
if [[ "$relative_path_from_content_root" == */* ]]; then
|
||||
dir_part="${relative_path_from_content_root%/*}/"
|
||||
filename_md_part="${relative_path_from_content_root##*/}"
|
||||
else
|
||||
dir_part=""
|
||||
filename_md_part="$relative_path_from_content_root"
|
||||
fi
|
||||
|
||||
local filename_base_orig="${filename_md_part%.md}"
|
||||
|
||||
# Sanitize filename_base_orig
|
||||
local sanitized_filename_base="${filename_base_orig:l}" # Lowercase
|
||||
sanitized_filename_base=${sanitized_filename_base//[[:space:],;\']/'-'} # Replace spaces, commas, semicolons, apostrophes
|
||||
|
||||
# Consolidate multiple hyphens into one
|
||||
sanitized_filename_base=$(echo "$sanitized_filename_base" | tr -s '-')
|
||||
|
||||
# Remove leading hyphens
|
||||
while [[ "$sanitized_filename_base" == -* ]]; do
|
||||
sanitized_filename_base="${sanitized_filename_base#-}"
|
||||
done
|
||||
# Remove trailing hyphens
|
||||
while [[ "$sanitized_filename_base" == *- ]]; do
|
||||
sanitized_filename_base="${sanitized_filename_base%-}"
|
||||
done
|
||||
|
||||
if [[ -z "$sanitized_filename_base" ]]; then
|
||||
_log WARNING "Original filename base '$filename_base_orig' from '$source_file' resulted in empty sanitized name. Using 'untitled' as fallback."
|
||||
sanitized_filename_base="untitled"
|
||||
fi
|
||||
|
||||
local frontmatter=$(sed -n '/^---$/,/^---$/{/^---$/d;p;}' "$source_file")
|
||||
local title=$(echo "$frontmatter" | grep -m1 -iE '^title:' | sed -E 's/^title:[[:space:]]*//i; s/^["\x27](.*)["\x27]$/\1/')
|
||||
local date=$(echo "$frontmatter" | grep -m1 -iE '^date:' | sed -E 's/^date:[[:space:]]*//i; s/^["\x27](.*)["\x27]$/\1/')
|
||||
local title=$(echo "$frontmatter" | grep -m1 -iE '^title:' | sed -E 's/^title:[[:space:]]*//i; s/^[\"\x27](.*)[\"\x27]$/\1/')
|
||||
local date=$(echo "$frontmatter" | grep -m1 -iE '^date:' | sed -E 's/^date:[[:space:]]*//i; s/^[\"\x27](.*)[\"\x27]$/\1/')
|
||||
local draft=$(echo "$frontmatter" | grep -m1 -iE '^draft:' | sed -E 's/^draft:[[:space:]]*//i' | tr '[:upper:]' '[:lower:]')
|
||||
local custom_layout_fm=$(echo "$frontmatter" | grep -m1 -iE '^layout:' | sed -E 's/^layout:[[:space:]]*//i; s/^["\x27](.*)["\x27]$/\1/')
|
||||
local custom_layout_fm=$(echo "$frontmatter" | grep -m1 -iE '^layout:' | sed -E 's/^layout:[[:space:]]*//i; s/^[\"\x27](.*)[\"\x27]$/\1/')
|
||||
|
||||
if [[ "$draft" == "true" && "${QSG_CONFIG[build_options_process_drafts]}" != "true" ]]; then
|
||||
_log DEBUG "Skipping draft file: $source_file"
|
||||
@ -658,11 +746,49 @@ _process_markdown_files() {
|
||||
fi
|
||||
|
||||
if [[ -z "$title" ]]; then
|
||||
_log WARNING "Markdown file '$source_file' is missing a title. Using filename as fallback."
|
||||
local fn_no_ext=$(basename "$source_file")
|
||||
title="${fn_no_ext%.md}"
|
||||
_log WARNING "Markdown file '$source_file' is missing a title. Using filename_base_orig ('$filename_base_orig') as fallback."
|
||||
title="$filename_base_orig"
|
||||
fi
|
||||
|
||||
# Determine output URL directory structure
|
||||
local output_url_dir_part=""
|
||||
# The variable 'relative_path_from_content_root' holds the path relative to 'content_dir' (e.g., posts/my-post.md)
|
||||
# The variable 'date' is extracted from frontmatter earlier
|
||||
# The variable 'dir_part' holds the original source directory (e.g., "posts/" or "pages/")
|
||||
|
||||
if [[ "$relative_path_from_content_root" == posts/* ]]; then # Check if it's a blog post from content/posts/
|
||||
if [[ -n "$date" ]]; then # Date was extracted earlier from frontmatter
|
||||
# Assuming date is YYYY-MM-DD
|
||||
local parsed_year=$(echo "$date" | awk -F'-' '{print $1}')
|
||||
local parsed_month=$(echo "$date" | awk -F'-' '{print $2}')
|
||||
local parsed_day=$(echo "$date" | awk -F'-' '{print $3}')
|
||||
|
||||
local valid_date_parts=true
|
||||
if ! [[ "$parsed_year" =~ ^[0-9]{4}$ ]]; then valid_date_parts=false; fi
|
||||
if ! [[ "$parsed_month" =~ ^(0[1-9]|1[0-2])$ ]]; then valid_date_parts=false; fi # Validates MM is 01-12
|
||||
if ! [[ "$parsed_day" =~ ^(0[1-9]|[12][0-9]|3[01])$ ]]; then valid_date_parts=false; fi # Validates DD is 01-31
|
||||
|
||||
if [[ "$valid_date_parts" == true ]]; then
|
||||
output_url_dir_part="blog/$parsed_year/$parsed_month/$parsed_day/"
|
||||
else
|
||||
_log WARNING "Blog post '$source_file' (date: '$date') has invalid date components. Required: YYYY-MM-DD. URL will be 'blog/${sanitized_filename_base}.html'."
|
||||
output_url_dir_part="blog/"
|
||||
fi
|
||||
else
|
||||
_log WARNING "Blog post '$source_file' is missing a date. URL will be 'blog/${sanitized_filename_base}.html'."
|
||||
output_url_dir_part="blog/"
|
||||
fi
|
||||
else # It's a regular page, not a blog post from content/posts/
|
||||
output_url_dir_part="$dir_part" # Use its original directory structure (e.g., "pages/" or "" for root files)
|
||||
fi
|
||||
|
||||
local output_file_html_part="${output_url_dir_part}${sanitized_filename_base}.html"
|
||||
local output_file_abs="$output_dir/$output_file_html_part"
|
||||
|
||||
_log DEBUG "Source: $source_file -> Output URL Path: $output_file_html_part (Original base: '$filename_base_orig', Sanitized: '$sanitized_filename_base')"
|
||||
|
||||
mkdir -p "$(dirname "$output_file_abs")"
|
||||
|
||||
local template_to_use=""
|
||||
if [[ -n "$custom_layout_fm" ]]; then
|
||||
if [[ "$custom_layout_fm" != *.html && "$custom_layout_fm" != *.xml ]]; then custom_layout_fm+=".html"; fi
|
||||
@ -674,7 +800,7 @@ _process_markdown_files() {
|
||||
fi
|
||||
|
||||
if [[ -z "$template_to_use" ]]; then
|
||||
if [[ "$relative_path" == posts/* && -f "$default_post_template" ]]; then
|
||||
if [[ "$relative_path_from_content_root" == posts/* && -f "$default_post_template" ]]; then
|
||||
template_to_use="$default_post_template"
|
||||
elif [[ -f "$default_page_template" ]]; then
|
||||
template_to_use="$default_page_template"
|
||||
@ -719,12 +845,24 @@ _process_markdown_files() {
|
||||
_log DEBUG "No CSS specified for post $source_file."
|
||||
fi
|
||||
|
||||
_log INFO "Generating $output_file_abs from $source_file using template $template_to_use"
|
||||
if ! "${pandoc_cmd[@]}"; then
|
||||
_log ERROR "Pandoc failed for $source_file. Command was: ${pandoc_cmd[*]}"
|
||||
else
|
||||
_log DEBUG "Successfully generated $output_file_abs"
|
||||
_log DEBUG "Executing Pandoc for $source_file. Command constructed with individual arguments (see array definition)."
|
||||
"${pandoc_cmd[@]}"
|
||||
local pandoc_exit_code=$?
|
||||
|
||||
if [[ $pandoc_exit_code -eq 0 ]]; then
|
||||
_log DEBUG "Successfully processed $source_file to $output_file_abs"
|
||||
# Add to sitemap URLs
|
||||
if [[ "${QSG_CONFIG[build_options_generate_sitemap]}" == "true" ]]; then
|
||||
SITEMAP_URLS+=("$output_file_html_part")
|
||||
_log DEBUG "Added to sitemap URLs: $output_file_html_part"
|
||||
fi
|
||||
else
|
||||
_log ERROR "Pandoc failed for $source_file (exit code: $pandoc_exit_code). Output: $output_file_abs"
|
||||
# Optionally, decide if one failure should stop all, or just skip this file
|
||||
# For now, we continue processing other files
|
||||
continue
|
||||
fi
|
||||
|
||||
done < <(find "$content_dir" -type f -name '*.md' -print0 | xargs -0 -r realpath)
|
||||
|
||||
_log INFO "Finished processing Markdown files."
|
||||
@ -1094,6 +1232,9 @@ main() {
|
||||
_generate_rss_feed
|
||||
if [[ $? -ne 0 ]]; then _log ERROR "RSS feed generation failed."; exit 1; fi
|
||||
|
||||
_generate_sitemap
|
||||
if [[ $? -ne 0 ]]; then _log WARNING "Sitemap generation encountered issues."; fi # Non-fatal, just a warning
|
||||
|
||||
_log INFO "Final state of output directory (${QSG_CONFIG[paths_output_dir]}):
|
||||
$(ls -R "${QSG_CONFIG[paths_output_dir]}" 2>&1)"
|
||||
_log SUCCESS "Site generation complete! Output: ${QSG_CONFIG[paths_output_dir]}"
|
||||
|
Loading…
x
Reference in New Issue
Block a user