#!/bin/bash # This is to correct the messes made by (alleged) operating systems # which cannot distinguish UPPER and lower case. # I received a CDROM, made on such a system, which had filenames # of mixed case, without any apparent logic. Some filenames were all # upper case, some all lower case, and some mixed. These files were # image files and HTML files. Within the .htm files and .HTM files, # IMG SRC and img src tags pointed to randomly mixed case filenames. # However, the random mix did not match the random mix of the actual # files. Example: when the real filename was # CT051.JPG # Even children in their first year of school can recognize the # difference between "A" and "a". Why can't that expensive joke of # an operating system? # Anyway, this is free; use at your own risk. It worked for me, but # might eat your dog and shave your cat. Don't blame me if it bombs. # Terry Vessels 2003 # # "It's finished when you can't take any more out." # Thanks to: "the_corruptor3000", "f3ew", "javatype", "vadim_35", and # Hal Burgiss for his Font De-uglification HOWTO (check the upper to # lower conversion sh script). # # preparation: the files from the CDROM were copied to a temporary # directory, permissions were set (755 for directories, 644 for files) # NOTE: this script is not recursive; it deals only with the current directory # # first, change all spaces in filenames to underscores # next, deal with the mismatch of mixed case by making # all filenames lower case for n in * do x=${n//\ /\_} if [ -f "${n}" ] then mv -iv "${n}" "${x}" test=$(echo "${x}" | tr A-Z a-z) if [ "${x}" != "${test}" ] then mv -iv "${x}" "${test}" fi fi done # last, change the mismatched references # to all lowercase (all characters between < and > will be lowered) for n in *.htm do base=${n/\.htm} sed -e 's//>\ /g' -e 's/IMG SRC/img src/g' ${n} |\ tr -d '\015' | tr -s '\012' > ${base}.fixed awk '{ if (substr($1,1,1)=="<"){ print tolower($0) } else { print $0 } }' ${base}.fixed > ${base}.htm rm -f ${base}.fixed done