PoiExtractor.java
package sk.iway.iwcm.findexer;
import java.io.InputStream;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.extractor.POITextExtractor;
import sk.iway.iwcm.io.IwcmInputStream;
/**
* Vytiahne z Wordu vsetky texty
*
*@Title WebJET
*@Company Interway s.r.o. (www.interway.sk)
*@Copyright Interway s.r.o. (c) 2001-2002
*@author $Author: jeeff $
*@version $Revision: 1.3 $
*@created Streda, 2004, január 21
*@modified $Date: 2010/01/20 08:45:42 $
*/
public class PoiExtractor
{
/**
* Gets the text attribute of the Word class
*
*@param fileName Description of the Parameter
*@return The text value
*/
public static String getText(String fileName)
{
IwcmInputStream is = null;
String result = null;
try
{
is = new IwcmInputStream(fileName);
}
catch (Exception ex)
{
sk.iway.iwcm.Logger.error(ex);
}
if (is != null)
{
result = getText(is);
try { if (is!=null) is.close(); } catch (Exception ex) { sk.iway.iwcm.Logger.error(ex); }
}
//ked sa nepodari ziskat data z doc, tak skus ci sa nedaju ziskat data cez rtf
if(result == null)
{
try
{
is = new IwcmInputStream(fileName);
result = Rtf.getText(is);
is.close();
}
catch (Exception ex)
{
sk.iway.iwcm.Logger.error(ex);
}
}
return result;
}
/**
* Gets the text attribute of the Word class
*
* @param is - IwcmInputStream
* @return
*/
public static String getText(InputStream is)
{
StringBuilder sb = new StringBuilder();
try
{
if (is != null)
{
POITextExtractor extractor = ExtractorFactory.createExtractor(is);
sb.append(extractor.getText());
is.close();
}
return(sb.toString());
}
catch (Exception ex)
{
sk.iway.iwcm.Logger.error(ex);
}
return (null);
}
}