#include /* read and HTML file as stdin and replace all HTML tags by a space unless for some special tags */ #define PRE -1 #define endPRE -2 int checkTag(char *s) { /* check which HTML tag found */ if (!strcasecmp(s,"")) return('\n'); if (!strcasecmp(s,"")) return('\t'); if (!strcasecmp(s,"")) return('\t'); if (!strcasecmp(s,"
")) return('\n'); if (!strcasecmp(s,"
")) return(PRE);
  if (!strcasecmp(s,"
")) return(endPRE); return(' '); } void error(char *msg) { fprintf(stderr,"%s\n", msg); exit(1); } main(int argc, char *argv[]) { char *buffer; int buff_max=1024; int buff_used=0; char *ptr; int c; int putit = 1; int blank = 0; int pre = 0; if ((buffer=(char*)malloc(buff_max))==NULL) error("can't allocate memory for line buffer"); /* read the file */ while ((c = getc(stdin)) != EOF) { switch (c) { case '<': /* begin of tag */ ptr = buffer; buff_used=1; *ptr = c; ptr++; putit = 0; break; case '>': /* end of tag */ ptr[0] = c; ptr[1] = 0; c = checkTag(buffer); switch (c) { case PRE: pre = 1; break; case endPRE: pre = 0; break; case ' ': if (!blank) putchar(c); blank = 1; break; default: blank = 0; putchar(c); } putit = 1; break; case '\n': /* end of line is not considered unless we are not in PRE section */ if (pre) putchar(c); break; default: if (putit) /* the character has to be printed */ { if (c == ' ' && !pre) { if (!blank) putchar(c); blank = 1; } else { putchar(c); blank = 0; } } else /* just store the character (in a tag) */ { if (buff_used > (buff_max - 3)) { buff_max += 1024; if ((buffer=(char*)realloc(buffer,buff_max))==NULL) error("can't realloc line buffer"); ptr = buffer + buff_used; } buff_used++; *ptr = c; ptr++; } } } free(buffer); }