127 lines
3.6 KiB
Awk
127 lines
3.6 KiB
Awk
# Taken from http://www.shelldorado.com/scripts/cmds/urlencode
|
|
##########################################################################
|
|
# Title : urlencode - encode URL data
|
|
# Author : Heiner Steven (heiner.steven@odn.de)
|
|
# Date : 2000-03-15
|
|
# Requires : awk
|
|
# Categories : File Conversion, WWW, CGI
|
|
# SCCS-Id. : @(#) urlencode 1.4 06/10/29
|
|
##########################################################################
|
|
# Description
|
|
# Encode data according to
|
|
# RFC 1738: "Uniform Resource Locators (URL)" and
|
|
# RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
|
|
#
|
|
# This encoding is used i.e. for the MIME type
|
|
# "application/x-www-form-urlencoded"
|
|
#
|
|
# Notes
|
|
# o The default behaviour is not to encode the line endings. This
|
|
# may not be what was intended, because the result will be
|
|
# multiple lines of output (which cannot be used in an URL or a
|
|
# HTTP "POST" request). If the desired output should be one
|
|
# line, use the "-l" option.
|
|
#
|
|
# o The "-l" option assumes, that the end-of-line is denoted by
|
|
# the character LF (ASCII 10). This is not true for Windows or
|
|
# Mac systems, where the end of a line is denoted by the two
|
|
# characters CR LF (ASCII 13 10).
|
|
# We use this for symmetry; data processed in the following way:
|
|
# cat | urlencode -l | urldecode -l
|
|
# should (and will) result in the original data
|
|
#
|
|
# o Large lines (or binary files) will break many AWK
|
|
# implementations. If you get the message
|
|
# awk: record `...' too long
|
|
# record number xxx
|
|
# consider using GNU AWK (gawk).
|
|
#
|
|
# o urlencode will always terminate it's output with an EOL
|
|
# character
|
|
#
|
|
# Thanks to Stefan Brozinski for pointing out a bug related to non-standard
|
|
# locales.
|
|
#
|
|
# See also
|
|
# urldecode
|
|
##########################################################################
|
|
|
|
PN=`basename "$0"` # Program name
|
|
VER='1.4'
|
|
|
|
: ${AWK=awk}
|
|
|
|
Usage () {
|
|
echo >&2 "$PN - encode URL data, $VER
|
|
usage: $PN [-l] [file ...]
|
|
-l: encode line endings (result will be one line of output)
|
|
|
|
The default is to encode each input line on its own."
|
|
exit 1
|
|
}
|
|
|
|
Msg () {
|
|
for MsgLine
|
|
do echo "$PN: $MsgLine" >&2
|
|
done
|
|
}
|
|
|
|
Fatal () { Msg "$@"; exit 1; }
|
|
|
|
set -- `getopt hl "$@" 2>/dev/null` || Usage
|
|
[ $# -lt 1 ] && Usage # "getopt" detected an error
|
|
|
|
EncodeEOL=no
|
|
while [ $# -gt 0 ]
|
|
do
|
|
case "$1" in
|
|
-l) EncodeEOL=yes;;
|
|
--) shift; break;;
|
|
-h) Usage;;
|
|
-*) Usage;;
|
|
*) break;; # First file name
|
|
esac
|
|
shift
|
|
done
|
|
|
|
LANG=C export LANG
|
|
$AWK '
|
|
BEGIN {
|
|
# We assume an awk implementation that is just plain dumb.
|
|
# We will convert an character to its ASCII value with the
|
|
# table ord[], and produce two-digit hexadecimal output
|
|
# without the printf("%02X") feature.
|
|
|
|
EOL = "%0A" # "end of line" string (encoded)
|
|
split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
|
|
hextab [0] = 0
|
|
for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
|
|
if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
|
|
}
|
|
{
|
|
encoded = ""
|
|
for ( i=1; i<=length ($0); ++i ) {
|
|
c = substr ($0, i, 1)
|
|
if ( c ~ /[a-zA-Z0-9.-]/ ) {
|
|
encoded = encoded c # safe character
|
|
} else if ( c == " " ) {
|
|
encoded = encoded "+" # special handling
|
|
} else {
|
|
# unsafe character, encode it as a two-digit hex-number
|
|
lo = ord [c] % 16
|
|
hi = int (ord [c] / 16);
|
|
encoded = encoded "%" hextab [hi] hextab [lo]
|
|
}
|
|
}
|
|
if ( EncodeEOL ) {
|
|
printf ("%s", encoded EOL)
|
|
} else {
|
|
print encoded
|
|
}
|
|
}
|
|
END {
|
|
#if ( EncodeEOL ) print ""
|
|
}
|
|
' "$@"
|
|
|