#!/bin/bash
# This is to correct the messes made by (alleged) operating systems
# which cannot distinguish UPPER and lower case.
# I received a CDROM, made on such a system, which had filenames
# of mixed case, without any apparent logic. Some filenames were all
# upper case, some all lower case, and some mixed. These files were
# image files and HTML files. Within the .htm files and .HTM files,
# IMG SRC and img src tags pointed to randomly mixed case filenames.
# However, the random mix did not match the random mix of the actual
# files. Example:
when the real filename was
# CT051.JPG
# Even children in their first year of school can recognize the
# difference between "A" and "a". Why can't that expensive joke of
# an operating system?
# Anyway, this is free; use at your own risk. It worked for me, but
# might eat your dog and shave your cat. Don't blame me if it bombs.
# Terry Vessels 2003
#
# "It's finished when you can't take any more out."
# Thanks to: "the_corruptor3000", "f3ew", "javatype", "vadim_35", and
# Hal Burgiss for his Font De-uglification HOWTO (check the upper to
# lower conversion sh script).
#
# preparation: the files from the CDROM were copied to a temporary
# directory, permissions were set (755 for directories, 644 for files)
# NOTE: this script is not recursive; it deals only with the current directory
#
# first, change all spaces in filenames to underscores
# next, deal with the mismatch of mixed case by making
# all filenames lower case
for n in *
do
x=${n//\ /\_}
if [ -f "${n}" ]
then
mv -iv "${n}" "${x}"
test=$(echo "${x}" | tr A-Z a-z)
if [ "${x}" != "${test}" ]
then
mv -iv "${x}" "${test}"
fi
fi
done
# last, change the mismatched
references
# to all lowercase (all characters between < and > will be lowered)
for n in *.htm
do
base=${n/\.htm}
sed -e 's/\
/>\
/g' -e 's/IMG SRC/img src/g' ${n} |\
tr -d '\015' | tr -s '\012' > ${base}.fixed
awk '{
if (substr($1,1,1)=="<"){
print tolower($0)
} else {
print $0
}
}' ${base}.fixed > ${base}.htm
rm -f ${base}.fixed
done