#include <stdio.h>
#include <stdlib.h>
/*
 * by w.j.hengeveld  nov 1997
 *    itsme@xs4all.nl
 *
 * simple html parser+checker
 *
 */

/*
   <NAME ..... >
	<!.....>
	</NAME>
*/
struct globals {
   int debug;
   int print_text;
   int print_html;
} gl={ 0, 0, 1 };

enum tag_type {
   TAG_NONE,
   TAG_UNKNOWN,
	TAG_DECL,
	TAG_MAYBE_DECL,
	TAG_COMMENT,
	TAG_MAYBE_COMMENT,
	TAG_START,
	TAG_END
};
char *typename[]={
  "NON",
  "UNK",
  "DEC",
  "DE?",
  "CMT",
  "CM?",
  "TAG",
  "END"
};

enum comment_state {
   CMT_NOT,
	CMT_INSIDE,
	CMT_END1,
	CMT_END2
};

#define MAX_TAG_STORAGE 4096

void process(FILE *f, char *name)
{
   char tag[MAX_TAG_STORAGE+1];
	int  in_tag=0;
	int  tag_idx=0;
	int  c;
   int  linenr=1;
   enum tag_type tag_type=TAG_NONE;
   enum comment_state cmt_stat=CMT_NOT;
   int crlf_count=0;

   while (!feof(f))
	{
      c=getc(f);
		if (tag_type!=TAG_NONE)
		{
		   if (tag_idx<MAX_TAG_STORAGE)
			{
		      if (c=='\n')
	            tag[tag_idx++]=' ';
   			else
	            tag[tag_idx++]=c;
		   }
         tag[tag_idx]=0;
		}

		if (tag_type==TAG_COMMENT)
		{
		   if (c=='-')
			{
			   if (cmt_stat==CMT_INSIDE)
			      cmt_stat=CMT_END1;
			   else if (cmt_stat==CMT_END1)
			      cmt_stat=CMT_END2;
				else if (cmt_stat!=CMT_END2)
				   cmt_stat=CMT_INSIDE;
		   }
			else if (c=='>')
			{
			   if (cmt_stat==CMT_END2)
				{
				   cmt_stat=CMT_NOT;
					goto print_tag;
			   }
				else
				   cmt_stat=CMT_INSIDE;
			}
	   	else
			   cmt_stat=CMT_INSIDE;
		}
		else if (c=='<')
		{
		   if (tag_type!=TAG_NONE)
			{
            if (gl.print_text && crlf_count==0) putchar('\n');
			   if (name) printf("%s(%d): ", name, linenr);
			   printf("Error: < inside tag: %s\n", tag);
			   tag_idx=0;
			}
			else
			{
				tag_type=TAG_UNKNOWN;
				tag_idx=0;
	         tag[tag_idx++]=c;  // add first char to tag
			}
		}
		else if (c=='>')
		{
		   if (tag_type==TAG_UNKNOWN)
			{
            if (gl.print_text && crlf_count==0) putchar('\n');
			   if (name) printf("%s(%d): ", name, linenr);
			   printf("Error: unexpected >: %s\n", tag);
			}
			else if (tag_type==TAG_NONE)
			{
            if (gl.print_text && crlf_count==0) putchar('\n');
			   if (name) printf("%s(%d): ", name, linenr);
			   printf("Error: unexpected >\n");
			}
			else
			{
print_tag:
            if (gl.print_text && crlf_count==0) putchar('\n');
			   if (name) printf("%s(%d): ", name, linenr);
				printf("%s: %s\n", typename[tag_type], tag);
			}
			tag_idx=0;
			tag_type=TAG_NONE;
		}
		else if (c=='!')
		{
		   if (tag_type==TAG_UNKNOWN)
			{
		      tag_type=TAG_MAYBE_DECL;
			}
		}
		else if (c=='/')
		{
		   if (tag_type==TAG_UNKNOWN)
			{
		      tag_type=TAG_END;
			}
		}
		else if (c=='-')
		{
		   if (tag_type==TAG_MAYBE_DECL)
			{
			   tag_type=TAG_MAYBE_COMMENT;
			}
		   else if (tag_type==TAG_MAYBE_COMMENT)
			{
			   tag_type=TAG_COMMENT;
				cmt_stat=CMT_INSIDE;
			}
		}
		else if (isalpha(c))
		{
		   if (tag_type==TAG_UNKNOWN)
			{
		      tag_type=TAG_START;
			}
			else if (tag_type==TAG_MAYBE_DECL)
			{
		      tag_type=TAG_DECL;
			}
		}
		else if (tag_type==TAG_UNKNOWN)
		{
		   tag_type=TAG_NONE;
			tag_idx=0;
		}
      if (tag_type==TAG_NONE && gl.print_text)
      {
         if (c=='\n' || c=='\r')
            crlf_count++;
         else
            crlf_count=0;
         if (crlf_count==1)
            putchar('\n');
         else if (crlf_count==0)
            putchar(c);
      }
	   if (c=='\n') linenr++;
	}
}

int process_options(char **argv, int argc, struct globals *gl)
{
   int i;
   char *p;
   char **argptr;
   int argsused=0;
   argptr=argv;

   p=*argptr++;

   if (*p!='-') return 0;
   p++; argsused++;
   while (*p)
   {
      switch(*p)
      {
         case 'd': gl->debug=1; break;
         case 't': gl->print_text=1; break;
         case 'h': gl->print_html=0; break;
      }
      p++;
   }

   return argsused;
}

int main(int argc, char **argv)
{
   FILE *f;
	int i;
	char *fn;
   for (i=1 ; i<argc ; )
	{
      if (argv[i][0]=='-')
      {
         i+=process_options(&argv[i], argc-i, &gl);
      }
      else
      {
	      fn=argv[i++];
	      f=fopen(fn, "r");
		   if (f==NULL)
		   {
		      perror(fn);
			   continue;
	      }
		   if (argc>2)
		      printf("--------%s--------\n", fn);
	      process(f, fn);
		   fclose(f);
      }
	}
   if (argc==1)
	   process(stdin, NULL);
	return 0;
}

