FulltextSearch.java
package sk.iway.iwcm.system.fulltext;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import io.github.duckasteroid.cdb.CdbMake;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import sk.iway.iwcm.Cache;
import sk.iway.iwcm.Constants;
import sk.iway.iwcm.DB;
import sk.iway.iwcm.Logger;
import sk.iway.iwcm.Tools;
import sk.iway.iwcm.common.AdminTools;
import sk.iway.iwcm.database.ComplexQuery;
import sk.iway.iwcm.database.Mapper;
import sk.iway.iwcm.database.SimpleQuery;
import sk.iway.iwcm.system.fulltext.indexed.Documents;
import sk.iway.iwcm.system.fulltext.indexed.Forums;
import sk.iway.iwcm.system.fulltext.indexed.Indexed;
import sk.iway.iwcm.system.fulltext.lucene.AnalyzerFactory;
import sk.iway.iwcm.system.fulltext.lucene.LuceneUtils;
/**
* FulltextSearch.java
*
* @Title webjet7
* @Company Interway s.r.o. (www.interway.sk)
* @Copyright Interway s.r.o. (c) 2001-2011
* @author $Author: jeeff thaber $
* @version $Revision: 1.3 $
* @created Date: 6.4.2011 17:56:22
* @modified $Date: 2004/08/16 06:26:11 $
*/
public class FulltextSearch
{
private static Map<String, SpellChecker> documentsSpellingDictionary = new Hashtable<>();
public static void log(Class<?> c, String msg, Writer log)
{
if (log != null)
{
try
{
log.write(Tools.formatDateTimeSeconds(Tools.getNow())+" " + msg+"<br/>");
log.flush();
}
catch (Exception e)
{
sk.iway.iwcm.Logger.error(e);
}
}
Logger.debug(c, msg);
}
interface Callback
{
void callback();
}
public static void index()
{
index(null, null);
}
@SuppressWarnings({"deprecation","unchecked"})
public static void index(Indexed indexed, Writer log)
{
List<Indexed> indexeds = new ArrayList<>();
if (indexed != null)
{
indexeds.add(indexed);
}
else
{
indexeds.add(new Documents(AdminTools.defaultLanguage()));
//indexeds.add(new Tickets()); JEEFF: zatial vypnute, nie je otestovane
indexeds.add(new Forums());
}
IndexWriter writer = null;
try
{
for (Indexed i : indexeds)
{
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, AnalyzerFactory.getAnalyzer(Version.LUCENE_31,i.language()));
if (indexed == null)
{
log(FulltextSearch.class, "Creating index", log);
/*reindexing the whole index*/
config.setOpenMode(OpenMode.CREATE);
}
else
{
log(FulltextSearch.class, "Appending index", log);
/*partial reindex*/
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
config.setRAMBufferSizeMB(64.0);
writer = new IndexWriter(getIndexDirectory(i.language()), config);
if (indexed != null)
{
log(FulltextSearch.class, "Deleting index data, type="+indexed.name(), log);
writer.deleteDocuments(new Term("type",indexed.name()));
writer.commit();
}
ComplexQuery query = new ComplexQuery().setSql(i.sql());
query.setStreamingResultSet(true);
int count = i.numberOfDocuments();
final CountDownLatch latch = new CountDownLatch(count);
i.setCallback(new Indexed.Callback()
{
@Override
public void call()
{
Logger.debug(FulltextSearch.class, "count down call");
latch.countDown();
}
});
log(FulltextSearch.class, "Indexing " + count + " documents.", log);
query.list(i.mapper(writer, log));
latch.await();
writer.commit();
log(FulltextSearch.class, "Optimizing index.", log);
writer.optimize();
log(FulltextSearch.class, "Closing index.", log);
writer.close();
writer = null;
i.close();
}
}
catch (Exception e)
{
sk.iway.iwcm.Logger.error(e);
log(FulltextSearch.class, "ERROR: "+e.getMessage(), log);
}
if (writer != null)
{
try
{
writer.close();
}
catch (IOException e)
{
sk.iway.iwcm.Logger.error(e);
}
}
}
/**
* @return
* @throws IOException
*/
public static synchronized Directory getIndexDirectory(String language) throws IOException
{
SimpleFSDirectory directory = (SimpleFSDirectory) Cache.getInstance().getObject("FulltextSearch.getIndexDirectory." + language);
if (directory == null)
{
File indexDirectory;
indexDirectory = new File(LuceneUtils.LUCENE_INDEX,language);
if (!indexDirectory.exists())
{
indexDirectory.mkdirs();
}
directory = new SimpleFSDirectory(indexDirectory);
Cache.getInstance().setObject("FulltextSearch.getIndexDirectory." + language, directory, 5);
}
return directory;
}
public static synchronized void updateSpellCheck(String language)
{
if (Constants.getBoolean("luceneUpdateSpellCheck") && !documentsSpellingDictionary.containsKey(language))
{
try
{
Directory indexDirectory = FulltextSearch.getIndexDirectory(language);
SpellChecker spellChecker = new SpellChecker(indexDirectory);
documentsSpellingDictionary.put(language, spellChecker);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, AnalyzerFactory.getAnalyzer(Version.LUCENE_31, language));
spellChecker.indexDictionary(new LuceneDictionary(IndexReader.open(indexDirectory), "data"), config, false);
}
catch (IOException e)
{
sk.iway.iwcm.Logger.error(e);
}
}
}
/**
* Vrati najblizsie podobne slovo
*
* @param textToFind
* @param language
* @return
*/
public static synchronized String[] suggestSimilar(String textToFind, String language)
{
String[] result = null;
try
{
updateSpellCheck(language);
SpellChecker sp = documentsSpellingDictionary.get(language);
if (sp != null)
{
String[] suggestions = sp.suggestSimilar(textToFind, 1);
return suggestions;
}
}
catch (IOException e)
{
sk.iway.iwcm.Logger.error(e);
}
return result;
}
/**
* Vrati mnozinu stopslov pre jazyk
*
* @param language
* @return
*/
@SuppressWarnings("unchecked")
public static synchronized Set<String> stopwords(String language)
{
Set<String> fromCache = (Set<String>) Cache.getInstance().getObject("FulltextSearch.stopwords." + language);
if (fromCache != null)
{
return fromCache;
}
List<String> stopwords = new SimpleQuery().forList("select word from stopword where language = ? ", language);
Set<String> result = new HashSet<>(stopwords);
Cache.getInstance().setObject("FulltextSearch.stopwords." + language, result, 5);
return result;
}
/**
* Vytvori cdb subor podla lem z databazy
*
* @param language
*/
public static void indexLemmas(String language)
{
final CdbMake cdb = new CdbMake();
try
{
File file = new File(LuceneUtils.LUCENE_INDEX + File.separatorChar + "lemmas" + File.separatorChar + language + ".cdb");
cdb.start(file);
new ComplexQuery().setSql("select form,lemma from lemma where language = ?").setParams(language)
.list(new Mapper<Void>()
{
int count = 0;
@Override
public Void map(ResultSet rs) throws SQLException
{
try
{
cdb.add(DB.internationalToEnglish(rs.getString("form")).getBytes(),
DB.internationalToEnglish(rs.getString("lemma")).getBytes());
if (count % 1000 == 0)
{
Logger.println(FulltextSearch.class, "Indexed " + count + " lemmas.");
}
count++;
}
catch (Exception e)
{
sk.iway.iwcm.Logger.error(e);
}
return null;
}
});
cdb.finish();
}
catch (IOException e1)
{
sk.iway.iwcm.Logger.error(e1);
}
}
/**
* Vytvori cdb subor podla thesarus slovnika v UTF-8
*
* @param language
*/
public static void indexSynonyms(String language)
{
final CdbMake cdb = new CdbMake();
try
{
File file = new File(LuceneUtils.LUCENE_INDEX + File.separatorChar + "synonyms" + File.separatorChar + language + ".cdb");
cdb.start(file);
Scanner scanner = new Scanner(new File(LuceneUtils.LUCENE_INDEX + File.separatorChar + "synonyms" + File.separatorChar
+ "thesarus_" + language + ".txt"), "UTF-8");
int count = 0;
while (scanner.hasNext())
{
String[] synonyms = scanner.nextLine().split(";");
byte[] base = synonyms[0].getBytes();
for (int i = 0; i < synonyms.length; i++)
{
String s = synonyms[i];
cdb.add(s.getBytes(), base);
}
if (count % 1000 == 0)
{
Logger.println(FulltextSearch.class, "Indexed " + count + " synonyms.");
}
count++;
}
scanner.close();
cdb.finish();
}
catch (IOException e1)
{
sk.iway.iwcm.Logger.error(e1);
}
}
}