HTMLNode.java

/*
 * HTML Parser
 * Copyright (C) 1997 David McNicol
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * file COPYING for more details.
 */

package cvu.html;

import java.util.Arrays;
import java.util.Enumeration;
import java.util.Vector;

import sk.iway.iwcm.Logger;

/**
 * This class represents a single node within an HTML tree. Each node
 * has a name, zero or more attributes and possibly some content. Nodes
 * can appear within the content of other nodes. <p>
 * End tags do not appear since they only indicate 'end-of-content'. To
 * prevent the system searching for the end of standalone tags, a dynamic
 * list has been implemented. When the HTMLNode class is resolved
 * a setup method is called adding a set of default standalone tags
 * to the list. Standalone tags can then be added and removed dynamically
 * using static method calls. <p>
 * The list is the only way the internal code can tell
 * whether a tag is standalone. If a problem occurs the tree structure
 * would still be sound, but it would not be accurate, so while the form
 * of the HTML would be conserved, searches would not operate correctly.
 * @see HTMLTree
 * @author <a href="http://www.strath.ac.uk/~ras97108/">David McNicol</a>
 */
@SuppressWarnings({"rawtypes", "unchecked"})
public class HTMLNode
{

	private HTMLNode parent;    // Refers to this node's parent.
	private String name;	    // Stores the name of the HTML node.
	private AttributeList attr; // List of element's attributes.
	private Vector children;    // Stores the HTML node's children.
	private boolean hidden;     // True if the node is not to be printed.

	/**
	 * Constructs a new HTMLNode.
	 * @param tag the TagToken representing the start of this node.
	 * @param standalone true if the tag does not have any content.
	 * @param src enumeration of tag tokens.
	 */
	public HTMLNode (TagToken tag, HTMLNode parent, Enumeration src) {

		// Store the reference to the node's parent.
		this.parent = parent;

		// Set the node to be unhidden by default.
		hidden = false;

		// Check if the given tag is null.
		if (tag != null) {

			// Store the node's name.
			name = tag.getName();

			// Store the node's attribute list.
			attr = tag.getAttributes();

			// Get the node's children if needed.
			if (HTMLNode.isStandalone(name))
				children = null;
			else
				children = parseChildren(src);
		} else {

			// Otherwise, set the name and attributes to null.
			name = null;
			attr = null;

			// Get the node's children from the enumeration.
			children = parseChildren(src);
		}
	}

	/**
	 * Constructs a new, detached HTMLNode with the specified name.
	 * @param name the name of the new node.
	 */
	public HTMLNode (String name) {

		// Store the name of the node.
		this.name = name;

		// The node will have no parent till it is added to a tree.
		parent = null;

		// Create a new attribute list.
		attr = new AttributeList();

		// Create space for children if the node is not standalone.
		if (HTMLNode.isStandalone(name))
			children = null;
		else
			children = new Vector();
	}

	/**
	 * Returns the name of this node.
	 */
	public String getName () {
		return name;
	}

	/**
	 * Returns the node's parent node.
	 */
	public HTMLNode getParent () {
		return parent;
	}

	/**
	 * Returns the node's children.
	 */
	public Enumeration getChildren () {

		// Return nothing if the node has any children.
		if (children == null) return null;

		return children.elements();
	}

	/**
	 * Returns true if the node is currently hidden.
	 */
	public boolean isHidden () {
		return hidden;
	}

	/**
	 * Hides the node.
	 */
	public void hide () {
		hidden = true;
	}

	/**
	 * "Unhides" the node.
	 */
	public void unhide () {
		hidden = false;
	}

	/**
	 * Returns the value of the attribute with the given name.
	 * @param name the name of the attribute.
	 */
	public String getAttribute (String name) {

		// Check that the attribute list is there.
		if (attr == null) return null;

		// Return the value associated with the attribute name.
		return attr.get(name);
	}

	/**
	 * Returns an attribute with all double quote characters
	 * escaped with a backslash.
	 * @param name the name of the attribute.
	 */
	public String getQuotedAttribute (String name) {

		// Check that the attribute list is there.
		if (attr == null) return null;

		// Return the quoted version.
		return attr.getQuoted(name);
	}

	/**
	 * Returns a string version of the attribute and its value.
	 * @param name the name of the attribute.
	 */
	public String getAttributeToString (String name) {

		// Check that the attribute list is there.
		if (attr == null) return null;

		// Return the string version.
		return attr.toString(name);
	}

	/**
	 * Returns a string version of the HTMLNode. If the node is
	 * currently hidden then return an empty string.
	 */
	@Override
	public String toString () {

		StringBuffer sb;  // Stores the string to be returned.
		Enumeration list; // List of node's attributes or children.

		// Get a new StringBuffer.
		sb = new StringBuffer();

		if (! hidden) {

			// Write the opening of the tag.
			sb.append('<');

			// Write the tag's name.
			sb.append(name);

			// Check if there are any attributes.
			if (attr != null && attr.size() > 0) {

				// Print string version of the attributes.
				sb.append(" " + attr);
			}

			// Finish off the tag.
			sb.append('>');
		}

		// Return if the node is standalone.
		if (isStandalone(name)) return sb.toString();

		// Otherwise, check if the node has any children.
		if (children != null && children.size() > 0) {

			// Get a list of all of the children.
			list = children.elements();

			while (list.hasMoreElements()) {

				// Get the next node from the list.
				Object o = list.nextElement();

				// Write it.
				sb.append(o.toString());
			}
		}

		if (! hidden) {
			// Write the end tag.
			sb.append("</").append(name).append('>');
		}

		// Return the string version.
		return sb.toString();
	}

	/**
	 * Sets the node's parent to the specified HTMLNode.
	 * @param parent the new parent.
	 */
	public void setParent (HTMLNode parent) {
		this.parent = parent;
	}

	/**
	 * Returns true if an attribute with the given name exists.
	 * @param name the name of the attribute.
	 */
	public boolean isAttribute (String name) {

		// Check that the attribute list is there.
		if (attr == null) return false;

		// Check the table for an attribute with that name.
		return attr.exists(name);
	}

	/**
	 * Adds a new attribute to the node's attribute list with
	 * the specified value. If the attribute already exists the
	 * old value is overwritten.
	 * @param name the name of the attribute.
	 * @param value the value of the attribute.
	 */
	public void addAttribute (String name, String value) {

		// Return if the attribute list is not there.
		if (attr == null) return;

		// Otherwise, add the name/value pair to the list.
		attr.set(name, value);
	}

	/**
	 * Adds an object to the end of this node's content
	 * @param child the node to be added.
	 */
	public void addChild (Object child) {

		// Return if the child is invalid.
		if (child == null) return;

		// Check that this node has no children.
		if (children == null) return;

		// Add the child if it is a string.
		if (child instanceof String) {

			children.addElement(child);
			return;
		}

		// Add the child and set its parent if it is an HTMLNode.
		if (child instanceof HTMLNode) {

			children.addElement(child);
			((HTMLNode) child).setParent(this);
		}
	}

	/**
	 * Removes the specified HTMLNode from the current node's
	 * list of children.
	 * @param child the node to be removed.
	 */
	public void removeChild (HTMLNode child) {

		// Return if the child is not defined properly
		if (child == null) return;

		// Return if the list of children is not defined properly.
		if (children == null) return;

		// Otherwise, remove the child if it is on the list.
		children.removeElement(child);
	}

	/**
	 * Adds an object to this node's content before
	 * the specified child node.
	 * @param child the object to be added.
	 * @param before the node before which the child will be placed.
	 */
	public void addChildBefore (Object child, HTMLNode before) {

		int total; // Total number of child nodes.
		int idx;   // Index of the 'before' node.

		// Return if the child is invalid.
		if (child == null) return;

		// Return if this node has no children.
		if (children == null) return;

		// Add the child at the beginning if the before node is
		// invalid.
		if (before == null) {

			addChild(child);
			return;
		}

		total = children.size();
		idx = children.indexOf(before);

		// Add the child to the beginning if the 'before' node
		// was not found.
		if (idx < 0) idx = 0;

		// Return if the child is not of the right type.
		if (! ((child instanceof String) ||
		  (child instanceof HTMLNode))) return;

		// Check if the 'before' node is the last node.
		if (idx == total - 1) {

			// Add the child to the end of the list.
			children.addElement(child);
		} else {

			// Add the child before the 'before' node.
			children.insertElementAt(child, idx);
		}

		// If the child is an HTMLNode, set its parent.
		if (child instanceof HTMLNode)
			((HTMLNode) child).setParent(this);
	}

	/**
	 * Removes an attribute with the specified name from the
	 * attribute list.
	 * @param name the name of the attribute to remove.
	 */
	public void removeAttribute (String name) {

		// Return if the attribute list is not there.
		if (attr == null) return;

		// Otherwise, remove the attribute from the list.
		attr.unset(name);
	}

	/**
	 * Returns the node after this one in the parent's
	 * list of children.
	 */
	public HTMLNode nextSibling () {

		// Return nothing if the node has no parent.
		if (parent == null) return null;

		// Ask the parent to return the node after this one.
		return parent.nextChild(this);
	}

	/**
	 * Returns the node before this one in the parent's
	 * list of children.
	 */
	public HTMLNode previousSibling () {

		// Return nothing if the node has no parent.
		if (parent == null) return null;

		// Ask the parent to return the node before this one.
		return parent.previousChild(this);
	}

	/**
	 * Returns the first child of this node.
	 */
	public HTMLNode firstChild () {

		Enumeration list; // Enumeration of this node's children.
		Object curr;	  // Current node from the list.

		// Return nothing if this node has no children.
		if (children == null) return null;

		// Return the first child node.
		list = children.elements();

		while (list.hasMoreElements()) {

			curr = list.nextElement();

			// Return the first HTMLNode in the list.
			if (curr instanceof HTMLNode)
				return (HTMLNode) curr;
		}

		// Return nothing if there were no HTMLNodes in the list.
		return null;
	}

	/**
	 * Returns the HTMLNode after the specified one in this
	 * nodes content.
	 * @param child the HTMLNode before the one we want.
	 */
	public HTMLNode nextChild (HTMLNode child) {

		Enumeration list;	 // List of this node's children.
		Object curr;		 // Current object from the list.
		boolean getNext = false; // True when child has been found.

		// Return nothing if this node has no children.
		if (children == null) return null;

		// Get a list of this node's children
		list = children.elements();

		while (list.hasMoreElements()) {

			curr = list.nextElement();

			// Check if we have found the specified child.
			if (getNext) {

				// Return the next HTMLNode we encounter.
				if (curr instanceof HTMLNode)
					return (HTMLNode) curr;
			} else {

				// Check if we have found the specified child.
				if (curr == child) getNext = true;
			}
		}

		return null;
	}

	/**
	 * Returns the HTMLNode before the specified one in this
	 * nodes content.
	 * @param child the HTMLNode after the one we want.
	 */
	public HTMLNode previousChild (HTMLNode child) {

		Enumeration list;	   // List of this node's children.
		Object curr;		   // Current object from the list.
		HTMLNode prev = null;      // Stores last found HTMLNode.
		//boolean returnPrev = true; // True when child has been found.

		// Return nothing if this node has no children.
		if (children == null) return null;

		// Get a list of this node's children
		list = children.elements();

		while (list.hasMoreElements()) {

			curr = list.nextElement();

			// Check if we have found the specified child.
			if (curr == child) return prev;

			// Check if curr is an HTMLNode.
			if (curr instanceof HTMLNode) {

				// Make curr the previously found HTMLNode.
				prev = (HTMLNode) curr;
			}
		}

		return null;
	}

	/**
	 * Parses the contents of this HTML node from the enumeration
	 * of tokens provided.
	 * @param src an enumeration of tokens.
	 */
	private Vector parseChildren (Enumeration src) {

		// Create a new Vector to store the contents.
		Vector store = new Vector();

		// Loop round the enumeration of tokens.
		while (src.hasMoreElements()) {

			// Get the next token from the enumeration.
			Object token = src.nextElement();

			// Check if the token is simple text.
			if (token instanceof TextToken) {

				// Cast the token into type TextToken.
				TextToken text = (TextToken) token;

				// Add the text string to the vector.
				store.addElement(text.getText());

				continue;
			}

			// Check if the token is a tag.
			if (token instanceof TagToken) {

				// Cast the token into type TagToken.
				TagToken tag = (TagToken) token;

				// Check if the token is an end tag.
				if (tag.isEndTag()) {

				  // Break if the end tags name matches.
				  if (name != null &&
				    name.equals(tag.getName())) break;

				  // Otherwise ignore the end tag.
				  continue;
				}

				// Otherwise make it into an HTMLNode.
				HTMLNode he =
				  new HTMLNode(tag, this, src);

				// Add the node to the vector.
				store.addElement(he);
			}
		}

		if (store.size() > 0)
			return store;
		else
			return null;
	}

	/**
	 * String of default node names which are standalone.
	 */
	private static String[] defaultStandaloneList = {
		"area", "base", "basefont", "bgsound", "br",
		"col", "dd", "dl", "dt", "font", "frame",
		"hr", "img", "input", "isindex", "li",
		"link", "meta", "nextid", "option", "overlay", "p",
		"param", "tab", "wbr", "!", "!--"
	};

	// Full list of standalone names.
	private static Vector standaloneList = null;

	// Load the default standalones into the list after class resolution.
	static {
		setupStandaloneList();
	}

	/**
	 * Utility method which people can use to find out exactly
	 * which nodes are in the default standalone list. The default
 	 * list is printed to the standard output.
	 */
	public static void printDefaultStandaloneList () {
		Logger.debug(HTMLNode.class, Arrays.toString(defaultStandaloneList));
	}

	/**
	 * Adds the specified string to the standalone list.
	 * @param name the new standalone name.
	 */
	public static void addStandalone (String name) {

		// Check if the list has been initialized first.
		if (standaloneList == null) return;

		// Convert the String to lower case.
		String lc = name.toLowerCase();

		// Check that the list does not have the String already.
		if (standaloneList.contains(lc)) return;

		// Otherwise add the lowercase string to the list.
		standaloneList.addElement(lc);
	}

	/**
	 * Removes the specified string from the standalone list.
	 * @param name the standalone name to remove.
	 */
	public static void removeStandalone (String name) {

		// Check if the standaloneList has been initialized first.
		if (standaloneList == null) return;

		// Convert the String to lower case.
		String lc = name.toLowerCase();

		// Remove the lowercase string from the list.
		standaloneList.removeElement(lc);
	}

	/**
	 * Checks the standalone list to see if it mentions the specified
	 * tag name and returns true if so.
	 * @param name the tag name to check against the list.
	 */
	public static boolean isStandalone (String name) {

		// Check if the standaloneList has been initialized first.
		if (standaloneList == null) return true;

		// Otherwise check the list to see if it contains the tag name.
		return standaloneList.contains(name);
	}

	/**
	 * Sets up the standalone vector at runtime using the list of
	 * default standalone tags. New standalone tags can then be added
	 * to the vector. <p>
	 * This method will only be executed once, since it is guarded
	 * by a private boolean variable.
	 */
	private static void setupStandaloneList () {

		// Create a new vector to store the defaults.
		standaloneList = new Vector(defaultStandaloneList.length);

		// Add all of the strings in the default list.
		for (int i = 0; i < defaultStandaloneList.length; i++)
			standaloneList.addElement(defaultStandaloneList[i]);
	}
}