import au.id.jericho.lib.html.*;
import java.util.*;
import java.io.*;
import java.net.*;
public class FindSpecificTags {
public static void main(String[] args) throws Exception {
String sourceUrlString="data/test.html";
if (args.length==0)
System.err.println("Using default argument of \""+sourceUrlString+'"');
else
sourceUrlString=args[0];
if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
MasonTagTypes.register();
Source source=new Source(new URL(sourceUrlString));
System.out.println("\n*******************************************************************************\n");
System.out.println("XML Declarations:");
displaySegments(source.findAllTags(StartTagType.XML_DECLARATION));
System.out.println("XML Processing instructions:");
displaySegments(source.findAllTags(StartTagType.XML_PROCESSING_INSTRUCTION));
PHPTagTypes.register(); // register PHPTagTypes after searching for XML processing instructions, otherwise PHP short tags override them.
StartTagType.XML_DECLARATION.deregister(); // deregister XML declarations so they are recognised as PHP short tags, consistent with the real PHP parser.
source=new Source(source); // have to create a new Source object after changing tag type registrations otherwise cache might contain tags found with previous configuration.
System.out.println("##################### PHP tag types now added to register #####################\n");
System.out.println("H2 Elements:");
displaySegments(source.findAllElements(Tag.H2));
System.out.println("Document Type Declarations:");
displaySegments(source.findAllTags(StartTagType.DOCTYPE_DECLARATION));
System.out.println("CDATA sections:");
displaySegments(source.findAllTags(StartTagType.CDATA_SECTION));
System.out.println("Common server tags: (eg ASP, JSP, PSP, ASP-style PHP or Mason substitution tag)");
displaySegments(source.findAllTags(StartTagType.SERVER_COMMON));
System.out.println("Tags starting with <%=");
displaySegments(source.findAllStartTags("%="));
System.out.println("Tags starting with <%=var");
displaySegments(source.findAllStartTags("%=var"));
System.out.println("HTML Comments:");
displaySegments(source.findAllTags(StartTagType.COMMENT));
System.out.println("Elements in namespace \"o\" (generated by MS-Word):");
displaySegments(source.findAllElements("o:"));
System.out.println("Tags starting with