这个是主程序
import java.util.*;
import java.net.*;
import java.io.*;
import org.w3c.tidy.Tidy;
import com.objectspace.jgl.PriorityQueue;
import org.w3c.dom.*;
import edu.gatech.disl.qr.util.*;
//import edu.gatech.disl.qr.wrapper.base.*;
import edu.gatech.disl.qr.xml.Piece;
public class Test
{
public int extractObjects(Node parsedPage) {
TreeTuple treeTuple = new TreeTuple(null);
TreeTuple rootTuple = new TreeTuple(null);
if (results == null) {
results = new Vector();
}
Node subtree = getSubtree(parsedPage, treeTuple, rootTuple, getSubtreePath(), getSubtreeRule());
finalSubtree = subtree;
TagHeuristic heuristic = new HighestCountTagHeuristic(null, configuration);
heuristic.process(subtree);
out.println("Tag count: "+heuristic.getTagCount());
fireWrapperBeganExtraction();
int count = printResults(finalTag, heuristic, treeTuple, rootTuple, results);
return count;
}
public static void main(String args[])
{
System.out.println("This is a test!");
Node p = null;
//Node p = new Node(3,3);带参数的形式
//Node p = new Node();//不带参数的形式
System.out.println("There are " +extractObjects(p);
}
}
这个是里边TreeTuple类的程序
package edu.gatech.disl.omini;
import java.util.*;
import org.w3c.dom.*;
//import edu.gatech.disl.qr.xml.Piece;
public class TreeTuple {
private Node node;
private int fanout = -1;
private int size = -1;
private int count = -1;
private int tagCount = -1;
private int tagFanout = -1;
private int childAverage = -1;
private int highestChildCount = -1;
private int largestChildSize = -1;
private int largestChildTagCount = -1;
private double ordinal = -1;
private int depth = -1;
private int volume = -1;
private int increase = -1;
private int tagCountIncrease = -1;
private int volume2 = -1;
private int volume3 = -1;
private int linkCount = -1;
private int contentCount= -1;
private int textCount= -1;
private int textContentCount = -1;
private WrapperConfiguration configuration;
private TreeTuple()
{
count = 0;
tagCount = 0;
}
/**
* TreeTuple constructor comment.
*/
public TreeTuple(Node p) {
this();
if (p == null) {
throw new IllegalArgumentException("subtree can't be null");
}
node = p;
if (conf == null) {
throw new IllegalArgumentException("configuration can't be null");
}
configuration = conf;
fanout = -1;
size = -1;
depth = -1;
tagCount = -1;
tagFanout = -1;
childAverage = -1;
increase = -1;
tagCountIncrease = -1;
volume = -1;
volume2 = -1;
volume3 = -1;
}
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj instanceof TreeTuple) {
TreeTuple o = (TreeTuple)obj;
if (o.getNode().equals(getNode())) {
return true;
}
}
return false;
}
public int getChildAverage() {
if (childAverage < 0 ) {
if (getTagFanout() == 0) {
childAverage = 0;
}
else {
childAverage = getSize()/getTagFanout();
}
}
return childAverage;
}
public int getContentCount() {
if (contentCount < 0 ) {
contentCount = SubtreeType.countContentChars(getNode());
}
return contentCount;
}
public int getCount() {
if (count < 0 ) {
}
return count;
}
/**
* Return the depth of this node. Lazily compute it if necessary
* Creation date: (7/17/00 10:45:39 AM)
* Author: David Buttler
* Revisions: <date> <description>
* @return int
*/
public int getDepth() {
if (depth < 0) {
depth = SubtreeType.depth(getNode(), configuration);
}
return depth;
}
public int getFanout() {
if (fanout < 0 ) {
fanout = SubtreeType.fanout(getNode(), configuration);
}
return fanout;
}
public int getHighestChildCount() {
if (highestChildCount < 0) {
highestChildCount = SubtreeType.highestChildCount(getNode(), configuration);
}
return highestChildCount;
}
public int getIncrease() {
if (increase < 0) {
if (getFanout()== 0) {
increase = 0;
}
else {
increase = getSize() - getChildAverage();
}
}
return increase;
}
public int getLargestChildSize() {
if (largestChildSize < 0) {
if (getFanout()== 0) {
largestChildSize = 0;
}
else {
largestChildSize = SubtreeType.getLargestChildSize(getNode(), configuration);
}
}
return largestChildSize;
}
public int getLargestChildTagCount() {
if (largestChildTagCount < 0) {
if (getFanout()== 0) {
largestChildTagCount = 0;
}
else {
largestChildTagCount = SubtreeType.getLargestChildTagCount(getNode(), configuration);
}
}
return largestChildTagCount;
}
public int getLinkCount() {
if (linkCount < 0 ) {
linkCount = SubtreeType.countLinks(getNode());
}
return linkCount;
}
public org.w3c.dom.Node getNode() {
return node;
}
public double getOrdinal() {
return ordinal;
}
public int getSize() {
if (size < 0) {
size = SubtreeType.subtreeSize(getNode(), configuration);
}
return size;
}
public int getTagCount() {
if (tagCount < 0 ) {
tagCount = SubtreeType.tagCount(getNode(), configuration);
}
return tagCount;
}
public int getTagCountIncrease() {
if (tagCountIncrease < 0) {
if (getFanout() == 0) {
tagCountIncrease = 0;
}
else {
tagCountIncrease = getTagCount() - getTagCount()/getTagFanout();
}
}
return tagCountIncrease;
}
public int getTagFanout() {
if (tagFanout < 0 ) {
tagFanout = SubtreeType.tagFanout(getNode(), configuration);
}
return tagFanout;
}
public int getTextContentCount() {
if (textContentCount < 0 ) {
textContentCount = SubtreeType.countContentNodes(getNode());
}
return textContentCount;
}
public int getTextCount() {
if (textCount < 0 ) {
textCount = SubtreeType.countTextNode(getNode());
}
return textCount;
}
public int getVolume() {
if (volume < 0 ) {
volume = getTagFanout()*getIncrease();
}
return volume;
}
public int getVolume2() {
if (volume2 < 0 ) {
volume2 = getTagFanout()*getTagCountIncrease();
}
return volume2;
}
public int getVolume3() {
if (volume3 < 0 ) {
volume3 = getTagFanout()*getTagCountIncrease()*getIncrease();
}
return volume3;
}
protected void setChildAverage(int newChildAverage) {
childAverage = newChildAverage;
}
protected void setContentCount(int newContentCount) {
contentCount = newContentCount;
}
protected void setCount(int newCount) {
count = newCount;
}
protected void setDepth(int newDepth) {
depth = newDepth;
}
protected void setFanout(int newFanout) {
fanout = newFanout;
}
protected void setHighestChildCount(int newHighestChildCount) {
highestChildCount = newHighestChildCount;
}
protected void setIncrease(int newIncrease) {
increase = newIncrease;
}
protected void setLargestChildSize(int newLargestChildSize) {
largestChildSize = newLargestChildSize;
}
protected void setLinkCount(int newLinkCount) {
linkCount = newLinkCount;
}
protected void setNode(org.w3c.dom.Node newNode) {
node = newNode;
}
public void setOrdinal(double newOrdinal) {
ordinal = newOrdinal;
}
protected void setSize(int newSize) {
size = newSize;
}
protected void setTagCount(int newTagCount) {
tagCount = newTagCount;
}
protected void setTagCountIncrease(int newTagCountIncrease) {
tagCountIncrease = newTagCountIncrease;
}
protected void setTextCount(int newLetterCount) {
textCount = newLetterCount;
}
protected void setVolume(int newVolume) {
volume = newVolume;
}
protected void setVolume2(int newVolume2) {
volume2 = newVolume2;
}
protected void setVolume3(int newVolume3) {
volume3 = newVolume3;
}
public String toString() {
StringBuffer buf = new StringBuffer();
buf.append("{");
if (node != null) {
buf.append(node.getNodeName());
}
else {
buf.append("null");
}
buf.append(", ");
buf.append(ordinal);
buf.append(", ");
buf.append(fanout);
buf.append(", ");
buf.append(size);
buf.append(", ");
buf.append(count);
buf.append(", ");
buf.append(childAverage);
buf.append("}");
return buf.toString();
}
}