# webch: Web Characters, version 0.1  (C) 23-6-2003 Daniel Clemente Laboreo
# webch: Change strange characters of web pages to ISO-Latin1
# www.danielclemente.com/webch   danielclemente@ozu.es
# This program is GPL; you can modify it at your own.

# You need awk and basename (from coreutils) to run the script.

# TODO:
# * Solve the DO-NOT-CHANGE-TAGS thing, maybe by using another language -C- or using tricky tricks with diff, awk and patch (I have some ideas)
# * Accept input from pipes
# * Learn about the euro symbol
# -u option for undoing changes


echo "************************************************************************"
echo "* starting Web Characters 0.1        23th June 2003 by Daniel Clemente *"
echo "************ GPL'ed ****************** www.danielclemente.com/webch ****"
echo "                                              danielclemente@ozu.es"
echo
echo "Use: $0 files...  (example: $0 *.htm). Output goes to 'converted' dir"
echo
echo Remember this version will also change things like:
echo "<!-- Año -->  to  <!-- A&#241;o -->"
echo "<IMG SRC=pingüino.png>  to  <IMG SRC=ping&#252;ino.png>"
echo  this is unwanted! But what did you hope of a 0.1 version?
echo "Anyway, wise webmasters don't use strange characters in paths."
echo

# The Convert function; it has to be declared here
Convert () {
	awk '{


	# Vowels with acute accent
	gsub (/á/, "\\&#225;");
	gsub (/é/, "\\&#233;");
	gsub (/í/, "\\&#237;");
	gsub (/ó/, "\\&#243;");
	gsub (/ú/, "\\&#250;");

	gsub (/Á/, "\\&#193;");
	gsub (/É/, "\\&#201;");
	gsub (/Í/, "\\&#205;");
	gsub (/Ó/, "\\&#211;");
	gsub (/Ú/, "\\&#218;");

	# Vowels with grave accent
	gsub (/à/, "\\&#224;");
	gsub (/è/, "\\&#232;");
	gsub (/ì/, "\\&#236;");
	gsub (/ò/, "\\&#242;");
	gsub (/ù/, "\\&#249;");

	gsub (/À/, "\\&#192;");
	gsub (/È/, "\\&#200;");
	gsub (/Ì/, "\\&#204;");
	gsub (/Ò/, "\\&#210;");
	gsub (/Ù/, "\\&#217;");

	# Vowels with umlaut
	gsub (/ä/, "\\&#228;");
	gsub (/ë/, "\\&#235;");
	gsub (/ï/, "\\&#239;");
	gsub (/ö/, "\\&#246;");
	gsub (/ü/, "\\&#252;");

	gsub (/Ä/, "\\&#196;");
	gsub (/Ë/, "\\&#203;");
	gsub (/Ï/, "\\&#207;");
	gsub (/Ö/, "\\&#214;");
	gsub (/Ü/, "\\&#220;");

	# Vowels with circumflex accent
	gsub (/â/, "\\&#226;");
	gsub (/ê/, "\\&#234;");
	gsub (/î/, "\\&#238;");
	gsub (/ô/, "\\&#244;");
	gsub (/û/, "\\&#251;");

	gsub (/Â/, "\\&#194;");
	gsub (/Ê/, "\\&#202;");
	gsub (/Î/, "\\&#206;");
	gsub (/Ô/, "\\&#212;");
	gsub (/Û/, "\\&#219;");

	# C cedil used in french, catalan, ... Similar to "s"
	gsub (/ç/, "\\&#231;");
	gsub (/Ç/, "\\&#199;");
	
	# "Eñe" used in spanish (pronounced as in "caNYon")
	gsub (/ñ/, "\\&#241;");
	gsub (/Ñ/, "\\&#209;");

	# To use at the beginning of long sentences ending with ! or ?
	gsub (/¡/, "\\&#161;");
	gsub (/¿/, "\\&#191;");


	# The following are not so common
	
	# The accent marks themselves, without vowels
	gsub (/Ž/, "\\&#381;");
	# No, &grave; ( `````` ) is not supported by browsers... Why?
	gsub (/š/, "\\&#353;");
	gsub (/\^/, "\\&#94;");

	# We use this in catalan with leter l: "l·l". Similar to "l"
	gsub (/·/, "\\&#183;");

	# As "1st" means "first", in spanish "1º"="primero" and "1ª"="primera"
	gsub (/º/, "\\&#186;");
	gsub (/ª/, "\\&#170;");

	# a-e ligature
	gsub (/æ/, "\\&#230;");
	gsub (/Æ/, "\\&#198;");

	# Very nice for quotations
	gsub (/«/, "\\&#171;");
	gsub (/»/, "\\&#187;");


	# Currency symbols
	gsub (/€/, "\\&#8364;"); # This does not work very well, tell me if you know something about it.
	gsub (/¢/, "\\&#162;");
	gsub (/£/, "\\&#163;");
	gsub (/¥/, "\\&#165;");
	
	# Use this to refer to sections (text paragrahs, for example)
	gsub (/§/, "\\&#167;");

	# Logical NOT (hypotheses, etc)
	gsub (/¬/, "\\&#172;");

	# Macron: indicates a longer sound for a vowel
	gsub (/¯/, "\\&#175;");


	# Add your own changes here. E-mail me the missing ones!


	print;

	}' "$1" >converted/"`basename \"$1\"`"
	# basename strips the path and returns the file name.
	# It's in package coreutils. Install it, it's useful.

	echo ok
	
}


# HERE BEGINS THE SCRIPT

mkdir converted &> /dev/null
while [ $# -ne 0 ]
do
	echo -n ***\> PROCESSING: $1...\ 


	# Check if it's a good file
	
	if ! test -r "$1"
	then
		echo error! Can\'t read file. Skipping it.
		
	elif test -d "$1"
	then
		echo "directory! Recursive processing not yet implemented; use $0 \`find\`"

	else
		# Start the processing
		Convert "$1"
	fi

	
	shift


done
rmdir converted  &> /dev/null # (if it's empty)
# THE SCRIPT ENDS HERE. I hope you liked it.



