Rewrite most sitemap generation code, much cleaner, modularize file title/desc extraction, take advantage of new fproc_cache to cache sitemap.

This commit is contained in:
uriel 2009-02-06 20:43:11 +01:00
parent d5b18a461a
commit 0507bfa482

View File

@ -5,65 +5,61 @@ tmpfile=/tmp/werc_sitemap_$pid.txt
echo '' > $tmpfile echo '' > $tmpfile
saveddf=$dirfilter saveddf=$dirfilter
fn getMdDesc { fn get_md_title {
sed 's/^(.......................................................................................................[^ ]*).*$/\1/g; 1q' < $1 sed 's/^(................................................................[^ ]*).*$/\1/g; 1q' < $1
}
fn get_html_title {
# H1 is not reliable because htmlroff doesn't use it :(
#desc=`{cat $1 | sed 32q | grep '<[Hh]1>' | sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'}
# Pick the first line of body instead
desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $1}
if(~ $#desc 0)
desc=`{sed 's/<[^>]*>//g; 1q' < $1}
}
fn get_file_title {
if(~ $1 */) {
if(test -f $1/index.md)
get_md_title $1/index.md
if not if(test -f $1/index.html)
get_html_title $1/index.html
}
if not if(~ $1 *.md)
get_md_title $1
if not if(~ $1 *.html)
get_html_title $1
if not
echo ''
} }
# XXX Instead of recursion should use du(1) or similar.
fn listDir { fn listDir {
d=$1 d=$1
if(~ $#d 0)
d=''
dirfilter=$saveddf dirfilter=$saveddf
blogDirs=()
if(test -f $d/_werc/config) if(test -f $d/_werc/config)
. ./$d/_werc/config . ./$d/_werc/config
echo '<ul class="sitemap-list">' echo '<ul class="sitemap-list">'
# Don't hide blog dirs for now for(i in `{ls -dF $d^*/ $d^*.md $d^*.html $d^*.txt >[2]/dev/null | sed $dirfilter}) {
#if(! ~ $#blogDirs 0 || ~ $1 */blog */Blog) desc=`{get_file_title $i}
# echo '' u=`{echo $i|sed 's!'$sitedir'!!; '$dirclean's!/index$!/!; '}
#if not
if(! ~ $#redirectPermanent 1)
{
for(i in `{ls -dF $d^*/ $d^*.md $d^*.html $d^*.txt >[2]/dev/null | sed $dirfilter$dirclean}) {
desc=''
if(test -f $i.md)
desc=`{getMdDesc $i.md}
if not if(~ $i */ && test -f $i/index.md)
desc=`{getMdDesc $i/index.md}
if not if(test -f $i.html) {
# H1 is not reliable because htmlroff doesn't use it :(
#desc = `{cat $i.html |sed 32q | grep '<[Hh]1>' |sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'}
# Pick the first line of body instead
desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $i.html}
if(~ $#desc 0)
desc=`{sed 's/<[^>]*>//g; 1q' < $i.html}
#desc=`{/bin/sed -e '0,/<[Bb][Oo][Dd][Yy]/d;s/<[^>]*>//g;/^$/d' < $i.html >[2]/dev/null | sed 1q}
}
if(! ~ $#desc 0 && ! ~ $desc '') if(! ~ $#desc 0 && ! ~ $desc '')
desc=' - '$"desc desc=' - '$"desc
tit=`{echo /$i|sed 's/_/ /g; s,.*/([^/]+)/?$,\1,'} n=`{echo /$u|sed 's/_/ /g; s,.*/([^/]+)/?$,\1,'}
echo '<li><a href="/'$i'">'^$"tit^'</a>' $"desc '</li>' echo '<li><a href="'$base_url$u'">'^$"n^'</a>' $"desc '</li>'
echo -n $base_url^$i >> $tmpfile echo $base_url^$u >> $tmpfile
if(test -d $i) { if(test -d $i)
echo / >> $tmpfile
@{ listDir $i } @{ listDir $i }
}
if not
echo >> $tmpfile
}
} }
echo '</ul>' echo '</ul>'
} }
cd $sitedir fproc_cache.rc listDir $sitedir/
listDir '' #listDir $sitedir/
cp $tmpfile ./sitemap.txt if(test -s $tmpfile)
rm $tmpfile mv $tmpfile $sitedir/sitemap.txt &
%} %}