<!-- Finnish -->

    <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>

        <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->

      </analyzer>

    </fieldType>


    <!-- French -->

    <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <!-- removes l', etc -->

        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.FrenchLightStemFilterFactory"/>

        <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->

        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->

      </analyzer>

    </fieldType>


    <!-- Irish -->

    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <!-- removes d', etc -->

        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>

        <!-- removes n-, etc. position increments is intentionally false! -->

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"

                enablePositionIncrements="false"/>

        <filter class="solr.IrishLowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>

      </analyzer>

    </fieldType>


    <!-- Galician -->

    <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt"

                enablePositionIncrements="true"/>

        <filter class="solr.GalicianStemFilterFactory"/>

        <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->

      </analyzer>

    </fieldType>


    <!-- Hindi -->

    <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <!-- normalizes unicode representation -->

        <filter class="solr.IndicNormalizationFilterFactory"/>

        <!-- normalizes variation in spelling -->

        <filter class="solr.HindiNormalizationFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt"

                enablePositionIncrements="true"/>

        <filter class="solr.HindiStemFilterFactory"/>

      </analyzer>

    </fieldType>


    <!-- Hungarian -->

    <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>

        <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->

      </analyzer>

    </fieldType>


    <!-- Armenian -->

    <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>

      </analyzer>

    </fieldType>


    <!-- Indonesian -->

    <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt"

                enablePositionIncrements="true"/>

        <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->

        <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>

      </analyzer>

    </fieldType>


    <!-- Italian -->

    <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <!-- removes l', etc -->

        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.ItalianLightStemFilterFactory"/>

        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->

      </analyzer>

    </fieldType>


    <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)


         NOTE: If you want to optimize search for precision, use default operator AND in your query

         parser config with <solrQueryParser defaultOperator="AND"/> further down in this file.  Use

         OR if you would like to optimize for recall (default).

    -->

    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">

      <analyzer>

        <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)


             Kuromoji has a search mode (default) that does segmentation useful for search.  A heuristic

             is used to segment compounds into its parts and the compound itself is kept as synonym.


             Valid values for attribute mode are:

                normal: regular segmentation

                search: segmentation useful for search with synonyms compounds (default)

              extended: same as search mode, but unigrams unknown words (experimental)


             For some applications it might be good to use search mode for indexing and normal mode for

             queries to reduce recall and prevent parts of compounds from being matched and highlighted.

             Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.


             Kuromoji also has a convenient user dictionary feature that allows overriding the statistical

             model with your own entries for segmentation, part-of-speech tags and readings without a need

             to specify weights.  Notice that user dictionaries have not been subject to extensive testing.


             User dictionary attributes are:

                       userDictionary: user dictionary filename

               userDictionaryEncoding: user dictionary encoding (default is UTF-8)


             See lang/userdict_ja.txt for a sample user dictionary file.


             Punctuation characters are discarded by default.  Use discardPunctuation="false" to keep them.


             See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.

          -->

        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>

        <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->

        <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->

        <filter class="solr.JapaneseBaseFormFilterFactory"/>

        <!-- Removes tokens with certain part-of-speech tags -->

        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"

                enablePositionIncrements="true"/>

        <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->

        <filter class="solr.CJKWidthFilterFactory"/>

        <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt"

                enablePositionIncrements="true"/>

        <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->

        <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>

        <!-- Lower-cases romaji characters -->

        <filter class="solr.LowerCaseFilterFactory"/>

      </analyzer>

    </fieldType>


    <!-- Latvian -->

    <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt"

                enablePositionIncrements="true"/>

        <filter class="solr.LatvianStemFilterFactory"/>

      </analyzer>

    </fieldType>


    <!-- Dutch -->

    <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>

      </analyzer>

    </fieldType>


    <!-- Norwegian -->

    <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>

        <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->

        <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->

      </analyzer>

    </fieldType>


    <!-- Portuguese -->

    <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.PortugueseLightStemFilterFactory"/>

        <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->

        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->

        <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->

      </analyzer>

    </fieldType>


    <!-- Romanian -->

    <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>

      </analyzer>

    </fieldType>


    <!-- Russian -->

    <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>

        <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->

      </analyzer>

    </fieldType>


    <!-- Swedish -->

    <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>

        <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->

      </analyzer>

    </fieldType>


    <!-- Thai -->

    <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.ThaiWordFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt"

                enablePositionIncrements="true"/>

      </analyzer>

    </fieldType>


    <!-- Turkish -->

    <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">

      <analyzer>

        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.TurkishLowerCaseFilterFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt"

                enablePositionIncrements="true"/>

        <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>

      </analyzer>

    </fieldType>


  </types>


</schema>

solr使用教程四【面试+工作】_analyzer