LemmatisatingFilter.java

package sk.iway.iwcm.system.fulltext.lucene;

import java.io.IOException;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.util.Version;

import sk.iway.iwcm.Constants;

/**
 * LematisatiingFilter.java
 * 
 * @Title webjet7
 * @Company Interway s.r.o. (www.interway.sk)
 * @Copyright Interway s.r.o. (c) 2001-2011
 * @author $Author: jeeff thaber $
 * @version $Revision: 1.3 $
 * @created Date: 15.4.2011 13:45:54
 * @modified $Date: 2004/08/16 06:26:11 $
 */
public class LemmatisatingFilter extends TokenFilter
{
	private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
	private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
	private String language;

	public LemmatisatingFilter(Version matchVersion, TokenStream input, String language)
	{
		super(input);
		this.language = language;
	}

	@Override
	public boolean incrementToken() throws IOException
	{
		if (input.incrementToken())
		{
			if (!keywordAtt.isKeyword())
			{ // don't muck with already-keyworded terms
				char[] buffer = termAtt.buffer();
				char[] stem = Lemmas.get(language, buffer, 0, termAtt.length());
				
				if (stem != null)
				{
					if (Constants.getBoolean("luceneIndexingSkSynonymExpansion"))
					{
						char[] synonym = Synonyms.get(language, buffer, 0, termAtt.length());
						if (synonym != null)
						{
							stem = synonym;
						}
					}
				}
				
				if (stem != null)
				{
					termAtt.setEmpty();
					for (char c : stem)
						termAtt.append(c);
				}
			}
			return true;
		}
		else
		{
			return false;
		}
	}
}