Html2Text.java
package sk.iway;
import java.util.List;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
/**
* Converts HTML to plain text
*
*@Title magma-web
*@Company Interway s.r.o. (www.interway.sk)
*@Copyright Interway s.r.o. (c) 2001-2002
*@author $Author: jeeff $
*@version $Revision: 1.5 $
*@created Piatok, 2003, november 28
*@modified $Date: 2005/10/25 06:48:04 $
*/
public class Html2Text
{
private Document doc;
public Html2Text(String html) {
//utility class
doc = Jsoup.parse(html);
}
/**
* Converts HTML to plain text
* @param html
* @return
*/
public static String html2text(String html)
{
if (html == null) return("");
return new Html2Text(html).getText();
}
/**
* Returns plain text from HTML
* @return
*/
public String getText() {
return doc.text();
}
/**
* Returns List of texts in HTML by selector (eg h1,h2)
* @param jsoup
* @param selector
* @return
*/
public List<String> getTextByElement(String selector) {
Elements tags = doc.select(selector);
List<String> texts = new ArrayList<>();
for (String text : tags.eachText()) {
texts.add(text);
}
return texts;
}
}