_sitemap: Split blogs and pages into separate sitemap files.

This commit is contained in:
Stig-Ørjan Smelror 2024-02-18 11:12:28 +01:00
parent 152d811901
commit 550d1a75a3

73
qsgen2
View File

@ -916,20 +916,81 @@ function _sitemap() {
_msg main "${0:t}_msg_1"
local sm_file="sitemap.xml"
local b_file="sitemap-blogs.xml"
local p_file="sitemap-pages.xml"
local sitemap_file="${www_root}/${sm_file}"
#local sitemap_blog="${www_root}/sitemap-blogs.xml"
#local sitemap_page="${www_root}/sitemap-pages.xml"
local sitemap_blog="${www_root}/${b_file}"
local sitemap_page="${www_root}/${p_file}"
# Find all HTML files and store them in an array
# local -a html_files=("${(@f)$(find "${www_root}" -type f -name "*.html")}")
local -a html_files=(${www_root}/**/[a-z]*.html(.))
local -a blog_files=$(echo ${html_files[@]} | grep blog)
local -a page_files=$(echo ${html_files[@]} | grep -v blog)
local -a xml_files=(${www_root}/[a-z]*.xml(.))
local -a xml_files=$( echo ${xml_files[@]} | grep -v "sitemap.xml" )
# Working on provinding 2 sitemaps, 1 for pages and 1 for blogs.
# sitemap-pages.xml
# sitemap-blogs.xml
# And in the main sitemap.xml file we link to these two
#local -a html_files=("${(@f)$(find "${www_root}" -type f -name "*.xml")}")
#local -a blog_files=("${(@f)$(find "${www_root}" -type f -name "*.html" | grep blog)}")
#local -a page_files=("${(@f)$(find "${www_root}" -type f -name "*.html" | grep -v blog)}")
# Start of the XML file for BLOGS
echo '<?xml version="1.0" encoding="UTF-8"?>' > ${b_file}
echo "<!-- Sitemap generated by ${QSGEN} ${VERSION} - https://github.com/kekePower/qsgen2 -->" >> ${b_file}
echo "<?xml-stylesheet type=\"text/xsl\" href=\"${site_url}/css/default-sitemap.xsl?sitemap=page\"?>" >> ${b_file}
echo '<urlset' >> ${b_file}
echo ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' >> ${b_file}
echo ' xmlns:xhtml="http://www.w3.org/1999/xhtml"' >> ${b_file}
echo ' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"' >> ${b_file}
echo '>' >> ${b_file}
# Add each URL to the sitemap
for file in "${blog_files[@]}"
do
# Remove www_root from the path and prepend site_url
local url="${site_url}${file#$www_root}"
local lastmod=$(stat -c %y "${file}" 2>/dev/null | cut -d' ' -f1,2 | sed 's/ /T/' | sed 's/\..*$//')
echo " <url>" >> ${b_file}
echo " <loc>${url}</loc>" >> ${b_file}
echo " <lastmod><![CDATA[${lastmod}+01:00]]></lastmod>" >> ${b_file}
echo " <changefreq><![CDATA[always]]></changefreq>" >> ${b_file}
echo " <priority><![CDATA[1]]></priority>" >> ${b_file}
echo " </url>" >> ${b_file}
done
# End of the XML file
echo '</urlset>' >> "${b_file}"
_msg std " - ${b_file}"
# Start of the XML file for PAGES
echo '<?xml version="1.0" encoding="UTF-8"?>' > ${p_file}
echo "<!-- Sitemap generated by ${QSGEN} ${VERSION} - https://github.com/kekePower/qsgen2 -->" >> ${p_file}
echo "<?xml-stylesheet type=\"text/xsl\" href=\"${site_url}/css/default-sitemap.xsl?sitemap=page\"?>" >> ${p_file}
echo '<urlset' >> ${p_file}
echo ' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' >> ${p_file}
echo ' xmlns:xhtml="http://www.w3.org/1999/xhtml"' >> ${p_file}
echo ' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"' >> ${p_file}
echo '>' >> ${p_file}
# Add each URL to the sitemap
for file in "${page_files[@]}"
do
# Remove www_root from the path and prepend site_url
local url="${site_url}${file#$www_root}"
local lastmod=$(stat -c %y "${file}" 2>/dev/null | cut -d' ' -f1,2 | sed 's/ /T/' | sed 's/\..*$//')
echo " <url>" >> ${p_file}
echo " <loc>${url}</loc>" >> ${p_file}
echo " <lastmod><![CDATA[${lastmod}+01:00]]></lastmod>" >> ${p_file}
echo " <changefreq><![CDATA[always]]></changefreq>" >> ${p_file}
echo " <priority><![CDATA[1]]></priority>" >> ${p_file}
echo " </url>" >> ${p_file}
done
# End of the XML file
echo '</urlset>' >> "${p_file}"
_msg std " - ${p_file}"
# Start of the XML file
echo '<?xml version="1.0" encoding="UTF-8"?>' > ${sitemap_file}
@ -942,7 +1003,7 @@ function _sitemap() {
echo '>' >> ${sitemap_file}
# Add each URL to the sitemap
for file in "${html_files[@]}"
for file in "${xml_files[@]}"
do
# Remove www_root from the path and prepend site_url
local url="${site_url}${file#$www_root}"