Solr Buildout Configuration¶
Solr Multi Core¶
solr.cfg:
[solr]
recipe = collective.recipe.solrinstance:mc
cores =
collection1
collection2
collection3
testing
default-core-name = collection1
Stopwords¶
solr.cfg:
[solr]
recipe = collective.recipe.solrinstance
filter =
text solr.StopFilterFactory ignoreCase="true" words="${buildout:directory}/etc/stopwords.txt"
stopwords.txt:
der
die
das
und
oder
Stemming¶
solr.cfg:
[solr]
recipe = collective.recipe.solrinstance
...
filter =
# text solr.GermanMinimalStemFilterFactory # Less aggressive
# text solr.GermanLightStemFilterFactory # Moderately aggressiv
# text solr.SnowballPorterFilterFactory language="German2" # More aggressive
text solr.StemmerOverrideFilterFactory dictionary="${buildout:directory}/etc/stemdict.txt" ignoreCase="false"
stemdict.txt:
# english stemming
monkeys monkey
otters otter
# some crazy ones that a stemmer would never do
dogs cat
# german stemming
gelaufen lauf
lief lauf
risiken risiko
Synonyms¶
solr.cfg:
[solr]
recipe = collective.recipe.solrinstance
...
filter-index =
# The recommended approach for dealing with synonyms is to expand the synonym
# when indexing. See: http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
text solr.SynonymFilterFactory synonyms="${buildout:directory}/etc/synonyms.txt" ignoreCase="true" expand="true"
synonyms.txt:
#Explicit mappings match any token sequence on the LHS of "=>"
#and replace with all alternatives on the RHS. These types of mappings #ignore the expand parameter in the schema.
#Examples:
i-pod, i pod => ipod,
sea biscuit, sea biscit => seabiscuit
#Equivalent synonyms may be separated with commas and give #no explicit mapping. In this case the mapping behavior will #be taken from the expand parameter in the schema. This allows #the same synonym file to be used in different synonym handling strategies.
#Examples:
ipod, i-pod, i pod
foozball , foosball
universe , cosmos
# If expand==true, "ipod, i-pod, i pod" is equivalent to the explicit mapping:
ipod, i-pod, i pod => ipod, i-pod, i pod # If expand==false, "ipod, i-pod, i pod" is equivalent to the explicit mapping:
ipod, i-pod, i pod => ipod
#multiple synonym mapping entries are merged.
foo => foo bar
foo => baz
#is equivalent to
foo => foo bar, baz
Autocomplete¶
solr.cfg:
[solr]
recipe = collective.recipe.solrinstance
...
additional-schema-config =
<copyField source="Title" dest="title_autocomplete" />
<copyField source="Description" dest="description_autocomplete" />
<copyField source="Title" dest="title_suggest" />
extra-field-types =
<fieldType class="solr.TextField" name="text_auto">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ShingleFilterFactory" maxShingleSize="4" outputUnigrams="true"/>
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1"/>
</analyzer>
</fieldType>
<fieldType class="solr.TextField" name="text_desc">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ShingleFilterFactory" maxShingleSize="4" outputUnigrams="true"/>
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1"/>
</analyzer>
</fieldType>
# Solr Config => parts/solr/solr/collection1/conf/solrconfig.xml
additional-solrconfig =
<!-- =================================================================== -->
<!-- AUTOCOMPLETE -->
<!-- =================================================================== -->
<requestHandler name="/autocomplete" class="solr.SearchHandler">
<lst name="defaults">
<!-- defType: a reference to the query parser that is used.
The 'edismax' query parser adds features to enhance search relevancy.
https://wiki.apache.org/solr/ExtendedDisMax -->
<str name="defType">edismax</str>
<!-- rows: maximum number of documents included in the response
https://wiki.apache.org/solr/CommonQueryParameters#rows -->
<str name="rows">10</str>
<!-- fl: field list to be returned in the response. -->
<str name="fl">description_autocomplete,title_autocomplete,score</str>
<!-- qf: query fields list with 'boosts' that are associated with each
field.
https://wiki.apache.org/solr/ExtendedDisMax#qf_.28Query_Fields.29
-->
<str name="qf">title_autocomplete^30 description_autocomplete^50.0</str>
<!-- pf: phrase fields list to 'boost' the score (after 'fq' and 'qf')
of documents where terms in 'q' appear in close proximity.
https://wiki.apache.org/solr/ExtendedDisMax#pf_.28Phrase_Fields.29
-->
<str name="pf">title_autocomplete^30 description_autocomplete^50.0</str>
<!-- result grouping:
https://wiki.apache.org/solr/FieldCollapsing#Request_Parameters -->
<str name="group">true</str>
<str name="group.field">title_autocomplete</str>
<str name="group.field">description_autocomplete</str>
<str name="sort">score desc</str>
<str name="group.sort">score desc</str>
</lst>
</requestHandler>
Suggest¶
solr.cfg:
[solr]
recipe = collective.recipe.solrinstance
...
additional-solrconfig =
<!-- =================================================================== -->
<!-- SUGGEST (INCLUDED IN THE DEFAULT SOLR SELECT REQUEST HANDLER) -->
<!-- =================================================================== -->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">title</str>
<lst name="spellchecker">
<str name="name">direct</str>
<str name="field">title_suggest</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.2</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">5</int>
<int name="minQueryLength">3</int>
<!--<float name="maxQueryFrequency">0.01</float>-->
</lst>
</searchComponent>
<requestHandler name="/select" class="solr.SearchHandler"
startup="lazy">
<lst name="defaults">
<!-- Solr Default Select Request Handler -->
<str name="echoParams">explicit</str>
<int name="rows">500</int>
<!-- Suggest -->
<str name="df">title_suggest</str>
<str name="spellcheck.dictionary">direct</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
Solr Import Handler¶
solr.cfg:
[solr]
recipe = collective.recipe.solrinstance:mc
additional-solrconfig =
<!-- Generate a unique key when creating documents in solr -->
<requestHandler name="/update" class="solr.UpdateRequestHandler">
<lst name="defaults">
<str name="update.chain">uuid</str>
</lst>
</requestHandler>
<!-- Generate a unique key when importing documents from csv in solr -->
<requestHandler name="/update/csv" class="solr.UpdateRequestHandler">
<lst name="defaults">
<str name="update.chain">uuid</str>
</lst>
</requestHandler>
<updateRequestProcessorChain name="uuid">
<processor class="solr.UUIDUpdateProcessorFactory">
<str name="fieldName">id</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
[solr-geolocations-import]
recipe = collective.recipe.template
input = inline:
#!/bin/sh
# Delete all data
curl http://${settings:solr-host}:${settings:solr-port}/solr/solr-core-geospatial/update?commit=true -H "Content-Type: text/xml" --data-binary '<delete><query>*:*</query></delete>'
# Import data
curl http://${settings:solr-host}:${settings:solr-port}/solr/solr-core-geospatial/update/csv?commit=true --data-binary @etc/geolocations.csv -H 'Content-type:text/csv; charset=utf-8'
output = ${buildout:directory}/bin/solr-geolocations-import
mode = 755
geolocations.csv:
"location","geolocation"
"01067 Dresden","51.057379, 13.715954"
"01069 Dresden","51.04931, 13.744873"
"01097 Dresden","51.060424, 13.745002"
...
Geospatial Search (with Autocomplete)¶
Works just when querying Solr directly. collective.solr needs some minor fixes. See https://github.com/collective/collective.solr/tree/spatial-filters.
solr.cfg:
[solr-core-geospatial]
max-num-results = 10
unique-key = id
index =
name:id type:uuid indexed:true stored:true multivalued:false required:true
name:location type:text indexed:true stored:true
name:geolocation type:location indexed:true stored:true
name:autocomplete type:text_auto indexed:true stored:true multivalued:true
additionalFieldConfig =
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
extra-field-types =
<fieldType name="uuid" class="solr.UUIDField" indexed="true" />
<fieldType class="solr.TextField" name="text_auto">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ShingleFilterFactory" maxShingleSize="4" outputUnigrams="true"/>
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1"/>
</analyzer>
</fieldType>
# Copy field city -> autocomplete
additional-schema-config =
<copyField source="location" dest="autocomplete" />
additional-solrconfig =
<!-- Generate a unique key when creating documents in solr -->
<requestHandler name="/update" class="solr.UpdateRequestHandler">
<lst name="defaults">
<str name="update.chain">uuid</str>
</lst>
</requestHandler>
<!-- Generate a unique key when importing documents from csv in solr -->
<requestHandler name="/update/csv" class="solr.UpdateRequestHandler">
<lst name="defaults">
<str name="update.chain">uuid</str>
</lst>
</requestHandler>
<updateRequestProcessorChain name="uuid">
<processor class="solr.UUIDUpdateProcessorFactory">
<str name="fieldName">id</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
filter =
text solr.LowerCaseFilterFactory