In my case i need to search Keywords like C#, .Net,C++..etc where standard analyzer strips out special characters so i used whitespace analyzer it doesn't work for me. while Indexing:
public void Indexing(DataSet ds)
{
string indexFileLocation = @"D:\Lucene.Net\Data";
Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, true);
IndexWriter indexWriter = new IndexWriter(dir, new WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
if (ds.Tables[0] != null)
{
DataTable dt = ds.Tables[0];
if (dt.Rows.Count > 0)
{
foreach (DataRow dr in dt.Rows)
{
//Create the Document object
Document doc = new Document();
foreach (DataColumn dc in dt.Columns)
{
string check = dc.ToString();
if (check.Equals("Skill_Summary"))
{
doc.Add(new Field(dc.ColumnName, dr[dc.ColumnName].ToString(), Field.Store.YES, Field.Index.ANALYZED));
}
if (check.Equals("Title"))
{
doc.Add(new Field(dc.ColumnName, dr[dc.ColumnName].ToString(), Field.Store.YES, Field.Index.ANALYZED));
}
}
// Write the Document to the catalog
indexWriter.AddDocument(doc);
}
}
}
// Close the writer
indexWriter.Close();
}
and Searching the Field like:
string[] searchfields = new string[] { "Skill_Summary", "Title" };
var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, searchfields, new WhitespaceAnalyzer());
string searchText = "C#";
//Split the search string into separate search terms by word
string[] terms = searchText.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
foreach (string term in terms)
{
finalQuery.Add(parser.Parse(term.Replace("*", "") + "*"), BooleanClause.Occur.MUST);
}
hits = searcher.Search(finalQuery);
how to build own analyzer using Whitespaceanalyzer and LowerCase filter in my case?.
how to build own analyzer using Whitespaceanalyzer and LowerCase filter in my case?.
public class CaseInsensitiveWhitespaceAnalyzer : Analyzer
{
/// <summary>
/// </summary>
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream t = null;
t = new WhitespaceTokenizer(reader);
t = new LowerCaseFilter(t);
return t;
}
}
PS: When you use wildcards(?,*), the query parser does not use any analyzer, just the lowercased form of your term (depending on the value of QueryParser.LowercaseExpandedTerms)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With