Saving a file as "plain ASCII" from MS Word, Adobe Acrobat, or other applications often creates a document that is full of strange non-ASCII characters. This is because while ASCII is a 7-bit character code, such programs use one of the many 8-bit extended ASCII alphabets.
The following little C program (reads from stdin and writes to stdout) converts everything to standard ASCII and also performs a couple of other chores:
Okay, here is the program.
/* Convert extended-ascii characters to reasonable
substitutes,
throw out control characters, wrap long lines, and get rid of
DOS carriage return characters */
#include <stdio.h>
char trans[] = " "
"E ,f\" * ^%S< Z "
" `'\"\".--~ s> zY"
" cL Y|S\"Ca ~ -"
"o 23'uP.,1o "
"SSSSSS CEEEEIIII"
"DNOOOOOx0UUUUYPB"
"aaaaaa ceeeeiiii"
"onooooo/0uuuuypy";
int main(){
int c; int i;
i = 0;
while ((c = getchar())!=-1){
if (c < ' ' && c!='\t' && c!='\n' &&
c!='\v' && c!='\f')
{ ; /* throw
it out */ }
else {
/* special
multi-char sequences */
if (c ==
0x85) { printf("..."); }
else if (c ==
0x8C) { printf("OE"); i+= 2; }
else if (c ==
0x88) { printf("**"); i+= 2; }
else if (c ==
0x99) { printf("tm"); i+= 2; }
else if (c ==
0x9C) { printf("oe"); i+= 2; }
else if (c ==
0xA9) { printf("(C)"); i+= 3; }
else if (c ==
0xAB) { printf("<<"); i+= 2; }
else if (c ==
0xAE) { printf("(R)"); i+= 3; }
else if (c ==
0xB1) { printf("+-"); i+= 2; }
else if (c ==
0xBB) { printf(">>"); i+= 2; }
else if (c ==
0xBC) { printf("1/4"); i+= 3; }
else if (c ==
0xBD) { printf("1/2"); i+= 3; }
else if (c ==
0xBE) { printf("3/4"); i+= 3; }
else if (c ==
0xC6) { printf("AE"); i+= 2; }
else if (c ==
0xE6) { printf("ae"); i+= 2; }
else {
if (c == '\v' || c == '\f') c = '\n';
else if (c > 0x7E) { c = trans[c - 0x7F]; }
/* line wrap */
if ((i > 70) && c == ' ') c = '\n';
putchar(c);
i ++;
}
if (c ==
'\n') i = 0;
}
}
return 0;
}