There is no need to explain why someone needs Lucene .
There are also lots of samples over net. I am just putting our sample if anyone
encounters this link.
import java.io.File ;
import java.io.Reader ;
import java.io.Serializable ;
import java.io.StringReader ;
import java.util.HashMap ;
import java.util.List ;
import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.standard.StandardAnalyzer ;
import org.apache.lucene.document.Document ;
import org.apache.lucene.document.Field ;
import org.apache.lucene.index.CorruptIndexException ;
import org.apache.lucene.index.IndexReader ;
import org.apache.lucene.index.IndexWriter ;
import org.apache.lucene.index.IndexWriter.MaxFieldLength ;
import org.apache.lucene.index.Term ;
import org.apache.lucene.queryParser.QueryParser ;
import org.apache.lucene.search.IndexSearcher ;
import org.apache.lucene.search.Query ;
import org.apache.lucene.search.ScoreDoc ;
import org.apache.lucene.search.Searcher ;
import org.apache.lucene.search.TermQuery ;
import org.apache.lucene.search.TopDocs ;
import org.apache.lucene.search.similar.MoreLikeThis ;
import org.apache.lucene.search.spell.Dictionary ;
import org.apache.lucene.search.spell.LuceneDictionary ;
import org.apache.lucene.search.spell.SpellChecker ;
import org.apache.lucene.store.Directory ;
import org.apache.lucene.store.FSDirectory ;
import org.apache.lucene.util.Version ;
public class YLuceneTester
{
private final String indexDir = "D:\\indexDir" ;
private final String spellDirPath = "D:\\spellDir" ;
/**
* create index
*/
public boolean createIndex( ) throws Exception
{
// if( true == ifIndexExist( ) )
// {
// return true ;
// }
// File dir = new File(dataDir);
// if(!dir.exists()){
// return false;
// }
//File[] htmls = dir.listFiles();
Directory fsDirectory = FSDirectory.open( new File( this.indexDir ) ) ;
Analyzer analyzer = new StandardAnalyzer( Version.LUCENE_33 ) ;
IndexWriter indexWriter = new IndexWriter( fsDirectory, analyzer, true, MaxFieldLength.UNLIMITED ) ;
addDocument( indexWriter ) ;
indexWriter.optimize( ) ;
indexWriter.close( ) ;
IndexReader indexReader = null ;
try
{
indexReader = IndexReader.open( fsDirectory ) ;
Dictionary dictionary = new LuceneDictionary( indexReader, "trans" ) ;
FSDirectory spellDir = FSDirectory.open( new File( this.spellDirPath ) ) ;
SpellChecker spellChecker = new SpellChecker( spellDir ) ;
spellChecker.indexDictionary( dictionary ) ;
spellChecker.close( );
}
finally
{
if( indexReader != null )
{
indexReader.close( ) ;
}
}
return true ;
}
/**
* Add one document to the Lucene index
* @throws Exception
* @throws CorruptIndexException
*/
public void addDocumentDB( IndexWriter indexWriter ) throws CorruptIndexException, Exception
{
YOrganization org = YOrganization.getTopLevelOrganization( "TXG100" ) ;
String hql = " Select y.trans,y.id FROM YEntityTranslation y where y.organization.id = " + org.getId( ) ;
HashMap parameters = new HashMap( ) ;
List existingCatalogData = ( List )HibernateUtils.execHQL( hql, parameters, 0, 1000 ) ; for( Object[ ] datas : existingCatalogData ) { String trans = ( String )datas[ 0 ] ; if( YClientUtils.isBlankTrim( trans ) ) continue ; System.err.println( trans ) ; Document document = new Document( ) ; //document.add( new Field( "path", path, Field.Store.YES, Field.Index.NO ) ) ; document.add( new Field( "trans", trans, Field.Store.YES, Field.Index.ANALYZED ) ) ; indexWriter.addDocument( document ) ; } } public void addDocument( IndexWriter indexWriter ) throws CorruptIndexException, Exception { String[] items = new String[]{"African lion","African wild cat","African wild dog","dog","cat","lion"}; for( String item : items ) { Document document = new Document( ) ; document.add( new Field( "trans", item, Field.Store.YES, Field.Index.ANALYZED ) ) ; indexWriter.addDocument( document ) ; } } public Query suggest( String queryString ,int distance) throws Exception { try { Directory fsDirectory = FSDirectory.open( new File( this.spellDirPath ) ) ; SpellChecker spellChecker = new SpellChecker( fsDirectory ) ; if( spellChecker.exist( queryString ) ) { return null ; } String[ ] similarWords = spellChecker.suggestSimilar( queryString, distance ) ; if( similarWords.length == 0 ) { return null ; } System.err.println( " Term = " + queryString + " Suggestions :" ) ; for( String similarWord : similarWords ) { System.err.println( " ) " + similarWord ) ; } return new TermQuery( new Term( "trans", similarWords[ 0 ] ) ) ; } catch( Exception e ) { throw new Exception( e.getMessage( ) ) ; } } public void searchIndex( String[ ] queryStrings ) throws Exception { Searcher searcher = new IndexSearcher( FSDirectory.open( new File( this.indexDir ) ) ) ; QueryParser parser = new QueryParser( Version.LUCENE_CURRENT, "trans", new StandardAnalyzer( Version.LUCENE_CURRENT ) ) ; for( String queryString : queryStrings ) { System.out.println( "nsearching for: " + queryString ) ; Query query = parser.parse( queryString ) ; TopDocs results = searcher.search( query, 10 ) ; System.out.println( "total hits: " + results.totalHits ) ; ScoreDoc[ ] hits = results.scoreDocs ; for( ScoreDoc hit : hits ) { Document doc = searcher.doc( hit.doc ) ; System.out.printf( "%5.3f %sn \n", hit.score, doc.get( "trans" ) ) ; } } searcher.close( ) ; } /** * judge if the index exists already */ public boolean ifIndexExist( ) { File directory = new File( this.indexDir ) ; if( 0 < directory.listFiles( ).length )
{
return true ;
}
else
{
return false ;
}
}
public String getIndexDir( )
{
return this.indexDir ;
}
public Query parse( String queryString ) throws Exception
{
QueryParser queryParser = new QueryParser( Version.LUCENE_CURRENT, "trans", new StandardAnalyzer( Version.LUCENE_CURRENT ) ) ;
queryParser.setDefaultOperator( QueryParser.AND_OPERATOR ) ;
return queryParser.parse( queryString ) ;
}
public void search( String queryString ,int distance ) throws Exception
{
long startTime = System.currentTimeMillis( ) ;
IndexSearcher is = null ;
FSDirectory spellDir = FSDirectory.open( new File( this.spellDirPath ) ) ;
Directory fsDirectory = FSDirectory.open( new File( this.indexDir ) ) ;
int minimumHits = 100 ;
int minimumScore = 5 ;
try
{
is = new IndexSearcher( fsDirectory ) ;
Query query = parse( queryString ) ;
TopDocs tdocs = is.search( query, 100 ) ;
//Hits hits = is.search( query ) ;
// for( ScoreDoc sdoc : tdocs.scoreDocs )
// {
// sdoc.
// }
String suggestedQueryString = null ;
if( tdocs.totalHits < minimumHits || tdocs.getMaxScore( ) < minimumScore )
{
Query didYouMean = suggest( queryString ,distance) ;
if( didYouMean != null )
{
suggestedQueryString = didYouMean.toString( "trans" ) ;
}
}
long endTime = System.currentTimeMillis( ) ;
//return new SearchResult( extractHits( hits ), hits.length( ), endTime - startTime, queryString, suggestedQueryString ) ;
}
finally
{
if( is != null )
{
is.close( ) ;
}
}
}
public void moreLikeThis( String text ) throws Exception
{
Directory fsDirectory = FSDirectory.open( new File( this.indexDir ) ) ;
IndexReader indexReader = IndexReader.open( fsDirectory ) ;
// FuzzyLikeThisQuer flt = new FuzzyLikeThisQuery( 50, new StandardAnalyzer( ) ) ;
// flt.addTerms( "product critical update", "title", 0.75f, FuzzyQuery.defaultPrefixLength ) ;
// BooleanQuery q = ( BooleanQuery )flt.rewrite( r ) ;
// int minNumClauseMatches = Math.round( q.clauses( ).size( ) * 0.5f ) ;
// q.setMinimumNumberShouldMatch( minNumClauseMatches ) ;
IndexSearcher is = new IndexSearcher( FSDirectory.open( new File( this.indexDir ) ) ) ;
MoreLikeThis mlt = new MoreLikeThis( indexReader ) ;
mlt.setFieldNames( new String[ ] { "trans" } ) ;
mlt.setMinWordLen( 2 ) ;
mlt.setBoost( true ) ;
Reader reader = new StringReader( text ) ;
//Create the query that we can then use to search the index
Query query = mlt.like( reader ) ;
//Search the index using the query and get the top 5 results
TopDocs topDocs = is.search( query, 5 ) ;
//Create an array to hold the quotes we are going to
//pass back to the client
for( ScoreDoc scoreDoc : topDocs.scoreDocs )
{
//This retrieves the actual Document from the index using
//the document number. (scoreDoc.doc is an int that is the
System.err.print( "--" + scoreDoc.toString( ) ) ;
}
is.close( );
}
public static void init()
{
YLuceneTester luceneTester = new YLuceneTester( ) ;
try
{
luceneTester.createIndex( ) ;
//luceneTester.searchIndex( new String[ ] { "Cleaner" } ) ;
//TermQuery q = ( TermQuery )luceneTester.suggest( "Claner" ) ;
//q.extractTerms( terms )
//luceneTester.moreLikeThis( "Clean" ) ;
// luceneTester.search( "Cleaner" ) ;
// luceneTester.search( "Cordless " ) ;
//
// luceneTester.suggest( "Cleane" ) ;
// luceneTester.suggest( "Clean" ) ;
// luceneTester.suggest( "Clnr" ) ;
}
catch( Exception e )
{
e.printStackTrace( ) ;
}
}
public static void tests()
{
YLuceneTester luceneTester = new YLuceneTester( ) ;
try
{
// luceneTester.searchIndex( new String[ ] { "Afri" } ) ;
// luceneTester.searchIndex( new String[ ] { "African" } ) ;
// luceneTester.searchIndex( new String[ ] { "Africax" } ) ;
// TermQuery q = ( TermQuery )luceneTester.suggest( "Claner" ) ;
// q.extractTerms( terms )
//luceneTester.moreLikeThis( "dog" ) ;
luceneTester.search( "Afrieen" ,1 ) ;
luceneTester.search( "Afrieen" ,1 ) ;
luceneTester.search( "Afrieen" ,1 ) ;
// luceneTester.search( "Cordless " ) ;
//
luceneTester.suggest( "Afrieen",2 ) ;
luceneTester.suggest( "lion" ,2) ;
// luceneTester.suggest( "Clnr" ) ;
}
catch( Exception e )
{
e.printStackTrace( ) ;
}
}
public static void main( String[ ] args )
{
//HibernateUtils._configFileName = "hibernate.hqltest.xml" ;
//init( );
tests( );
}
}
Nice blog about punchout XML, it's being great to read this.
ReplyDeleteCXML Punchout
Thanks for sharing article about Benefits of OCI Punchout, Open Catalog Interface Punchout
ReplyDeleteBenefits of OCI Punchout