Rewrite most sitemap generation code, much cleaner, modularize file title/desc extraction, take advantage of new fproc_cache to cache sitemap.
This commit is contained in:
parent
d5b18a461a
commit
0507bfa482
@ -5,65 +5,61 @@ tmpfile=/tmp/werc_sitemap_$pid.txt
|
|||||||
echo '' > $tmpfile
|
echo '' > $tmpfile
|
||||||
saveddf=$dirfilter
|
saveddf=$dirfilter
|
||||||
|
|
||||||
fn getMdDesc {
|
fn get_md_title {
|
||||||
sed 's/^(.......................................................................................................[^ ]*).*$/\1/g; 1q' < $1
|
sed 's/^(................................................................[^ ]*).*$/\1/g; 1q' < $1
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_html_title {
|
||||||
|
# H1 is not reliable because htmlroff doesn't use it :(
|
||||||
|
#desc=`{cat $1 | sed 32q | grep '<[Hh]1>' | sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'}
|
||||||
|
# Pick the first line of body instead
|
||||||
|
desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $1}
|
||||||
|
if(~ $#desc 0)
|
||||||
|
desc=`{sed 's/<[^>]*>//g; 1q' < $1}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_file_title {
|
||||||
|
|
||||||
|
if(~ $1 */) {
|
||||||
|
if(test -f $1/index.md)
|
||||||
|
get_md_title $1/index.md
|
||||||
|
if not if(test -f $1/index.html)
|
||||||
|
get_html_title $1/index.html
|
||||||
|
}
|
||||||
|
if not if(~ $1 *.md)
|
||||||
|
get_md_title $1
|
||||||
|
if not if(~ $1 *.html)
|
||||||
|
get_html_title $1
|
||||||
|
if not
|
||||||
|
echo ''
|
||||||
}
|
}
|
||||||
|
|
||||||
# XXX Instead of recursion should use du(1) or similar.
|
|
||||||
fn listDir {
|
fn listDir {
|
||||||
d=$1
|
d=$1
|
||||||
if(~ $#d 0)
|
|
||||||
d=''
|
|
||||||
dirfilter=$saveddf
|
dirfilter=$saveddf
|
||||||
blogDirs=()
|
|
||||||
if(test -f $d/_werc/config)
|
if(test -f $d/_werc/config)
|
||||||
. ./$d/_werc/config
|
. ./$d/_werc/config
|
||||||
|
|
||||||
echo '<ul class="sitemap-list">'
|
echo '<ul class="sitemap-list">'
|
||||||
|
|
||||||
# Don't hide blog dirs for now
|
for(i in `{ls -dF $d^*/ $d^*.md $d^*.html $d^*.txt >[2]/dev/null | sed $dirfilter}) {
|
||||||
#if(! ~ $#blogDirs 0 || ~ $1 */blog */Blog)
|
desc=`{get_file_title $i}
|
||||||
# echo ''
|
u=`{echo $i|sed 's!'$sitedir'!!; '$dirclean's!/index$!/!; '}
|
||||||
#if not
|
|
||||||
if(! ~ $#redirectPermanent 1)
|
|
||||||
{
|
|
||||||
|
|
||||||
for(i in `{ls -dF $d^*/ $d^*.md $d^*.html $d^*.txt >[2]/dev/null | sed $dirfilter$dirclean}) {
|
|
||||||
desc=''
|
|
||||||
if(test -f $i.md)
|
|
||||||
desc=`{getMdDesc $i.md}
|
|
||||||
if not if(~ $i */ && test -f $i/index.md)
|
|
||||||
desc=`{getMdDesc $i/index.md}
|
|
||||||
if not if(test -f $i.html) {
|
|
||||||
# H1 is not reliable because htmlroff doesn't use it :(
|
|
||||||
#desc = `{cat $i.html |sed 32q | grep '<[Hh]1>' |sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'}
|
|
||||||
# Pick the first line of body instead
|
|
||||||
desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $i.html}
|
|
||||||
if(~ $#desc 0)
|
|
||||||
desc=`{sed 's/<[^>]*>//g; 1q' < $i.html}
|
|
||||||
#desc=`{/bin/sed -e '0,/<[Bb][Oo][Dd][Yy]/d;s/<[^>]*>//g;/^$/d' < $i.html >[2]/dev/null | sed 1q}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(! ~ $#desc 0 && ! ~ $desc '')
|
if(! ~ $#desc 0 && ! ~ $desc '')
|
||||||
desc=' - '$"desc
|
desc=' - '$"desc
|
||||||
tit=`{echo /$i|sed 's/_/ /g; s,.*/([^/]+)/?$,\1,'}
|
n=`{echo /$u|sed 's/_/ /g; s,.*/([^/]+)/?$,\1,'}
|
||||||
echo '<li><a href="/'$i'">'^$"tit^'</a>' $"desc '</li>'
|
echo '<li><a href="'$base_url$u'">'^$"n^'</a>' $"desc '</li>'
|
||||||
echo -n $base_url^$i >> $tmpfile
|
echo $base_url^$u >> $tmpfile
|
||||||
if(test -d $i) {
|
if(test -d $i)
|
||||||
echo / >> $tmpfile
|
|
||||||
@{ listDir $i }
|
@{ listDir $i }
|
||||||
}
|
|
||||||
if not
|
|
||||||
echo >> $tmpfile
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
echo '</ul>'
|
echo '</ul>'
|
||||||
}
|
}
|
||||||
|
|
||||||
cd $sitedir
|
fproc_cache.rc listDir $sitedir/
|
||||||
listDir ''
|
#listDir $sitedir/
|
||||||
|
|
||||||
cp $tmpfile ./sitemap.txt
|
if(test -s $tmpfile)
|
||||||
rm $tmpfile
|
mv $tmpfile $sitedir/sitemap.txt &
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
Loading…
Reference in New Issue
Block a user