Wednesday, August 19, 2015

Lucene Tester


There is no need to explain why someone needs Lucene .
There are also lots of samples over net. I am just putting our sample if anyone
encounters this link.


import java.io.File ;
import java.io.Reader ;
import java.io.Serializable ;
import java.io.StringReader ;
import java.util.HashMap ;
import java.util.List ;

import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.standard.StandardAnalyzer ;
import org.apache.lucene.document.Document ;
import org.apache.lucene.document.Field ;
import org.apache.lucene.index.CorruptIndexException ;
import org.apache.lucene.index.IndexReader ;
import org.apache.lucene.index.IndexWriter ;
import org.apache.lucene.index.IndexWriter.MaxFieldLength ;
import org.apache.lucene.index.Term ;
import org.apache.lucene.queryParser.QueryParser ;
import org.apache.lucene.search.IndexSearcher ;
import org.apache.lucene.search.Query ;
import org.apache.lucene.search.ScoreDoc ;
import org.apache.lucene.search.Searcher ;
import org.apache.lucene.search.TermQuery ;
import org.apache.lucene.search.TopDocs ;
import org.apache.lucene.search.similar.MoreLikeThis ;
import org.apache.lucene.search.spell.Dictionary ;
import org.apache.lucene.search.spell.LuceneDictionary ;
import org.apache.lucene.search.spell.SpellChecker ;
import org.apache.lucene.store.Directory ;
import org.apache.lucene.store.FSDirectory ;
import org.apache.lucene.util.Version ;

public class YLuceneTester
{


    private final String indexDir = "D:\\indexDir" ;

    private final String spellDirPath = "D:\\spellDir" ;

    /**
     * create index
     */
    public boolean createIndex( ) throws Exception
    {
        //        if( true == ifIndexExist( ) )
        //        {
        //            return true ;
        //        }
        //        File dir = new File(dataDir);
        //        if(!dir.exists()){
        //            return false;
        //        }

        //File[] htmls = dir.listFiles();

        Directory fsDirectory = FSDirectory.open( new File( this.indexDir ) ) ;
        Analyzer analyzer = new StandardAnalyzer( Version.LUCENE_33 ) ;
        IndexWriter indexWriter = new IndexWriter( fsDirectory, analyzer, true, MaxFieldLength.UNLIMITED ) ;

        addDocument( indexWriter ) ;

        indexWriter.optimize( ) ;
        indexWriter.close( ) ;

        IndexReader indexReader = null ;
        try
        {
            indexReader = IndexReader.open( fsDirectory ) ;
            Dictionary dictionary = new LuceneDictionary( indexReader, "trans" ) ;
            FSDirectory spellDir = FSDirectory.open( new File( this.spellDirPath ) ) ;
            SpellChecker spellChecker = new SpellChecker( spellDir ) ;
            spellChecker.indexDictionary( dictionary ) ;
            spellChecker.close( );
        }
        finally
        {
            if( indexReader != null )
            {
                indexReader.close( ) ;
            }
        }
        return true ;

    }

    /**
     * Add one document to the Lucene index
     * @throws Exception 
     * @throws CorruptIndexException 
     */
    public void addDocumentDB( IndexWriter indexWriter ) throws CorruptIndexException, Exception
    {
        YOrganization org = YOrganization.getTopLevelOrganization( "TXG100" ) ;

        String hql = " Select y.trans,y.id FROM YEntityTranslation y where y.organization.id =  " + org.getId( ) ;

        HashMap parameters = new HashMap( ) ;

        List existingCatalogData = ( List )HibernateUtils.execHQL( hql, parameters, 0, 1000 ) ;          for( Object[ ] datas : existingCatalogData )         {             String trans = ( String )datas[ 0 ] ;             if( YClientUtils.isBlankTrim( trans ) )                 continue ;              System.err.println( trans ) ;             Document document = new Document( ) ;             //document.add( new Field( "path", path, Field.Store.YES, Field.Index.NO ) ) ;             document.add( new Field( "trans", trans, Field.Store.YES, Field.Index.ANALYZED ) ) ;              indexWriter.addDocument( document ) ;         }      }          public void addDocument( IndexWriter indexWriter ) throws CorruptIndexException, Exception     {         String[] items = new String[]{"African lion","African wild cat","African wild dog","dog","cat","lion"};                  for( String item : items )         {              Document document = new Document( ) ;             document.add( new Field( "trans", item, Field.Store.YES, Field.Index.ANALYZED ) ) ;              indexWriter.addDocument( document ) ;         }      }      public Query suggest( String queryString ,int distance) throws Exception     {         try         {             Directory fsDirectory = FSDirectory.open( new File( this.spellDirPath ) ) ;             SpellChecker spellChecker = new SpellChecker( fsDirectory ) ;             if( spellChecker.exist( queryString ) )             {                 return null ;             }             String[ ] similarWords = spellChecker.suggestSimilar( queryString, distance ) ;             if( similarWords.length == 0 )             {                 return null ;             }              System.err.println( " Term = " + queryString + " Suggestions :" ) ;             for( String similarWord : similarWords )             {                 System.err.println( " ) " + similarWord ) ;             }              return new TermQuery( new Term( "trans", similarWords[ 0 ] ) ) ;         }         catch( Exception e )         {             throw new Exception( e.getMessage( ) ) ;         }     }      public void searchIndex( String[ ] queryStrings ) throws Exception     {         Searcher searcher = new IndexSearcher( FSDirectory.open( new File( this.indexDir ) ) ) ;         QueryParser parser = new QueryParser( Version.LUCENE_CURRENT, "trans", new StandardAnalyzer( Version.LUCENE_CURRENT ) ) ;         for( String queryString : queryStrings )         {             System.out.println( "nsearching for: " + queryString ) ;             Query query = parser.parse( queryString ) ;             TopDocs results = searcher.search( query, 10 ) ;             System.out.println( "total hits: " + results.totalHits ) ;             ScoreDoc[ ] hits = results.scoreDocs ;             for( ScoreDoc hit : hits )             {                 Document doc = searcher.doc( hit.doc ) ;                 System.out.printf( "%5.3f %sn \n", hit.score, doc.get( "trans" ) ) ;             }         }         searcher.close( ) ;     }       /**      * judge if the index exists already      */     public boolean ifIndexExist( )     {         File directory = new File( this.indexDir ) ;         if( 0 < directory.listFiles( ).length )
        {
            return true ;
        }
        else
        {
            return false ;
        }
    }

    public String getIndexDir( )
    {
        return this.indexDir ;
    }


    public Query parse( String queryString ) throws Exception
    {
        QueryParser queryParser = new QueryParser( Version.LUCENE_CURRENT, "trans", new StandardAnalyzer( Version.LUCENE_CURRENT ) ) ;
        queryParser.setDefaultOperator( QueryParser.AND_OPERATOR ) ;
        return queryParser.parse( queryString ) ;
    }

    public void search( String queryString ,int distance ) throws Exception
    {
        long startTime = System.currentTimeMillis( ) ;
        IndexSearcher is = null ;
        FSDirectory spellDir = FSDirectory.open( new File( this.spellDirPath ) ) ;
        Directory fsDirectory = FSDirectory.open( new File( this.indexDir ) ) ;

        int minimumHits = 100 ;
        int minimumScore = 5 ;

        try
        {
            is = new IndexSearcher( fsDirectory ) ;
            Query query = parse( queryString ) ;

            TopDocs tdocs = is.search( query, 100 ) ;

            //Hits hits = is.search( query ) ;

            //            for( ScoreDoc sdoc :  tdocs.scoreDocs )
            //            {
            //                sdoc.
            //            }

            String suggestedQueryString = null ;
            if( tdocs.totalHits < minimumHits || tdocs.getMaxScore( ) < minimumScore )
            {
                Query didYouMean = suggest( queryString ,distance) ;
                if( didYouMean != null )
                {
                    suggestedQueryString = didYouMean.toString( "trans" ) ;
                }
            }

            long endTime = System.currentTimeMillis( ) ;

            //return new SearchResult( extractHits( hits ), hits.length( ), endTime - startTime, queryString, suggestedQueryString ) ;
        }
        finally
        {
            if( is != null )
            {
                is.close( ) ;
            }
        }
    }

    public void moreLikeThis( String text ) throws Exception
    {
        Directory fsDirectory = FSDirectory.open( new File( this.indexDir ) ) ;

        IndexReader indexReader = IndexReader.open( fsDirectory ) ;

        //        FuzzyLikeThisQuer flt = new FuzzyLikeThisQuery( 50, new StandardAnalyzer( ) ) ;
        //        flt.addTerms( "product critical update", "title", 0.75f, FuzzyQuery.defaultPrefixLength ) ;
        //        BooleanQuery q = ( BooleanQuery )flt.rewrite( r ) ;
        //        int minNumClauseMatches = Math.round( q.clauses( ).size( ) * 0.5f ) ;
        //        q.setMinimumNumberShouldMatch( minNumClauseMatches ) ;

        IndexSearcher is = new IndexSearcher( FSDirectory.open( new File( this.indexDir ) ) ) ;

        MoreLikeThis mlt = new MoreLikeThis( indexReader ) ;
        mlt.setFieldNames( new String[ ] { "trans" } ) ;

        mlt.setMinWordLen( 2 ) ;
        mlt.setBoost( true ) ;

        Reader reader = new StringReader( text ) ;

        //Create the query that we can then use to search the index
        Query query = mlt.like( reader ) ;

        //Search the index using the query and get the top 5 results
        TopDocs topDocs = is.search( query, 5 ) ;

        //Create an array to hold the quotes we are going to
        //pass back to the client

        for( ScoreDoc scoreDoc : topDocs.scoreDocs )
        {
            //This retrieves the actual Document from the index using
            //the document number. (scoreDoc.doc is an int that is the

            System.err.print( "--" + scoreDoc.toString( ) ) ;
        }
        
        is.close( );

    }
    
    public static void init()
    {
        YLuceneTester luceneTester = new YLuceneTester( ) ;
        try
        {
            luceneTester.createIndex( ) ;

            //luceneTester.searchIndex( new String[ ] { "Cleaner" } ) ;
            //TermQuery q = ( TermQuery )luceneTester.suggest( "Claner" ) ;
            //q.extractTerms( terms )

            //luceneTester.moreLikeThis( "Clean" ) ;

            //            luceneTester.search( "Cleaner" ) ;
            //            luceneTester.search( "Cordless " ) ;
            //
            //            luceneTester.suggest( "Cleane" ) ;
            //            luceneTester.suggest( "Clean" ) ;
            //            luceneTester.suggest( "Clnr" ) ;

        }
        catch( Exception e )
        {
            e.printStackTrace( ) ;
        }
    }
    
    public static void tests()
    {
        YLuceneTester luceneTester = new YLuceneTester( ) ;
        try
        {

//            luceneTester.searchIndex( new String[ ] { "Afri" } ) ;
//            luceneTester.searchIndex( new String[ ] { "African" } ) ;
//            luceneTester.searchIndex( new String[ ] { "Africax" } ) ;
//            TermQuery q = ( TermQuery )luceneTester.suggest( "Claner" ) ;
//            q.extractTerms( terms )

            //luceneTester.moreLikeThis( "dog" ) ;

                        luceneTester.search( "Afrieen" ,1 ) ;
                        luceneTester.search( "Afrieen" ,1 ) ;
                        luceneTester.search( "Afrieen" ,1 ) ;
            //            luceneTester.search( "Cordless " ) ;
            //
                        luceneTester.suggest( "Afrieen",2 ) ;
                        luceneTester.suggest( "lion" ,2) ;
            //            luceneTester.suggest( "Clnr" ) ;

        }
        catch( Exception e )
        {
            e.printStackTrace( ) ;
        }
    }

    public static void main( String[ ] args )
    {
        //HibernateUtils._configFileName = "hibernate.hqltest.xml" ;
        
        //init( );
        
        tests( );



    }
}

2 comments:

  1. Nice blog about punchout XML, it's being great to read this.
    CXML Punchout


    ReplyDelete
  2. Thanks for sharing article about Benefits of OCI Punchout, Open Catalog Interface Punchout
    Benefits of OCI Punchout

    ReplyDelete