From 0507bfa482e8b49c9c96954a5777150918758358 Mon Sep 17 00:00:00 2001 From: uriel Date: Fri, 6 Feb 2009 20:43:11 +0100 Subject: [PATCH] Rewrite most sitemap generation code, much cleaner, modularize file title/desc extraction, take advantage of new fproc_cache to cache sitemap. --- lib/sitemap.tpl | 80 +++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/lib/sitemap.tpl b/lib/sitemap.tpl index 3ade1e9..3818233 100644 --- a/lib/sitemap.tpl +++ b/lib/sitemap.tpl @@ -5,65 +5,61 @@ tmpfile=/tmp/werc_sitemap_$pid.txt echo '' > $tmpfile saveddf=$dirfilter -fn getMdDesc { - sed 's/^(.......................................................................................................[^ ]*).*$/\1/g; 1q' < $1 +fn get_md_title { + sed 's/^(................................................................[^ ]*).*$/\1/g; 1q' < $1 +} + +fn get_html_title { + # H1 is not reliable because htmlroff doesn't use it :( + #desc=`{cat $1 | sed 32q | grep '<[Hh]1>' | sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'} + # Pick the first line of body instead + desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $1} + if(~ $#desc 0) + desc=`{sed 's/<[^>]*>//g; 1q' < $1} +} + +fn get_file_title { + + if(~ $1 */) { + if(test -f $1/index.md) + get_md_title $1/index.md + if not if(test -f $1/index.html) + get_html_title $1/index.html + } + if not if(~ $1 *.md) + get_md_title $1 + if not if(~ $1 *.html) + get_html_title $1 + if not + echo '' } -# XXX Instead of recursion should use du(1) or similar. fn listDir { d=$1 - if(~ $#d 0) - d='' dirfilter=$saveddf - blogDirs=() if(test -f $d/_werc/config) . ./$d/_werc/config echo '' } -cd $sitedir -listDir '' +fproc_cache.rc listDir $sitedir/ +#listDir $sitedir/ -cp $tmpfile ./sitemap.txt -rm $tmpfile +if(test -s $tmpfile) + mv $tmpfile $sitedir/sitemap.txt & %}