Home | Mirror | Search |
目錄
http://lucene.apache.org/solr/
java 採用apt-get安裝
例 7.1. /etc/profile.d/java.sh
################################################ ### Java environment by neo ################################################ export JAVA_HOME=/usr export JRE_HOME=/usr export PATH=$PATH:/usr/local/apache-tomcat/bin/:/usr/local/jetty-6.1.18/bin export CLASSPATH="./:/usr/share/java/:/usr/local/apache-solr/example/multicore/lib" export JAVA_OPTS="-Xms128m -Xmx1024m"
wget http://apache.freelamp.com/lucene/solr/1.3.0/apache-solr-1.3.0.tgz tar zxvf apache-solr-1.3.0.tgz ln -s apache-solr-1.3.0 ../apache-solr cd ../apache-solr/example/ java -jar start.jar
multicore: java -Dsolr.solr.home=multicore -jar start.jar
http://jetty.mortbay.org/jetty/
http://tomcat.apache.org/
download
cd /usr/local/src wget http://apache.etoak.com/tomcat/tomcat-6/v6.0.20/bin/apache-tomcat-6.0.20.tar.gz wget http://apache.freelamp.com/lucene/solr/1.3.0/apache-solr-1.3.0.tgz tar zxvf apache-tomcat-6.0.20.tar.gz ln -s apache-tomcat-6.0.20 ../apache-tomcat tar zxvf apache-solr-1.3.0.tgz ln -s apache-solr-1.3.0 ../apache-solr
solr.xml
vim /usr/local/apache-tomcat/conf/Catalina/localhost/solr.xml <Context docBase="/usr/local/apache-solr/dist/apache-solr-1.3.0.war" debug="0" crossContext="true" > <Environment name="solr/home" type="java.lang.String" value="/usr/local/apache-solr/example/solr" override="true" /> </Context>
http://code.google.com/p/solr-php-client/
wget http://solr-php-client.googlecode.com/files/SolrPhpClient.2009-03-11.tgz tar zxvf SolrPhpClient.2009-03-11.tgz sudo mv SolrPhpClient/Apache /usr/share/php/
solr.xml
vim /usr/local/apache-solr/example/multicore/solr.xml <?xml version="1.0" encoding="UTF-8" ?> <solr persistent="false"> <cores adminPath="/admin/cores"> <core name="core0" instanceDir="core0" /> <core name="core1" instanceDir="core1" /> <core name="article" instanceDir="article" /> </cores> </solr>
core directory and config file
mkdir -p article/conf vim article/conf/solrconfig.xml <?xml version="1.0" encoding="UTF-8" ?> <config> <updateHandler class="solr.DirectUpdateHandler2" /> <requestDispatcher handleSelect="true" > <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" /> </requestDispatcher> <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" /> <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> <admin> <defaultQuery>solr</defaultQuery> </admin> </config> vim article/conf/schema.xml <?xml version="1.0" ?> <schema name="example core zero" version="1.1"> <types> <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> <fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/> <fieldType name="text" class="solr.TextField" positionIncrementGap="100" /> </types> <fields> <!-- general --> <field name="id" type="sint" indexed="true" stored="true" multiValued="false" required="true"/> <field name="type" type="string" indexed="true" stored="true" multiValued="false" /> <field name="name" type="string" indexed="true" stored="true" multiValued="false" /> <field name="title" type="string" indexed="true" stored="true" multiValued="false" /> <field name="content" type="text" indexed="true" stored="true" multiValued="false" /> <field name="timestamp" type="date" indexed="true" stored="true" default="NOW"/> </fields> <!-- field to use to determine and enforce document uniqueness. --> <uniqueKey>id</uniqueKey> <!-- field for the QueryParser to use when an explicit fieldname is absent --> <defaultSearchField>content</defaultSearchField> <!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> <solrQueryParser defaultOperator="OR"/> <copyField source="title" dest="content"/> <copyField source="name" dest="content"/> </schema>
commit datas
vim test.xml <add> <doc> <field name="id">1</field> <field name="name">Hello world</field> </doc> <doc> <field name="id">2</field> <field name="title">Title Hello world</field> </doc> <doc> <field name="id">3</field> <field name="name">Hello world 1</field> <field name="content">Content 1</field> </doc> <doc> <field name="id">4</field> <field name="name">Name Neo</field> </doc> <doc> <field name="id">5</field> <field name="name">Last Chan</field> </doc> </add> java -Durl=http://localhost:8983/solr/article/update -Dcommit=yes -jar ../exampledocs/post.jar test.xml
<fieldType name="text" class="solr.TextField" > <analyzer> <tokenizer class="org.apache.solr.analysis.ChineseTokenizerFactory"/> </analyzer> </fieldType>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.CJKTokenizerFactory"/> </analyzer> </fieldType>
http://code.google.com/p/mmseg4j/
install
$ cd /usr/local/src/ $ wget http://mmseg4j.googlecode.com/files/mmseg4j-1.7.2.zip $ unzip mmseg4j-1.7.2.zip $ mkdir /usr/local/apache-solr/example/multicore/lib $ cp /usr/local/src/mmseg4j-1.7.2/mmseg4j-all-1.7.2.jar /usr/local/apache-solr/example/multicore/lib $ cd mmseg4j-1.7.2/
test
$ java -Dmmseg.dic.path=/usr/local/apache-solr/example/solr -jar mmseg4j-all-1.7.2.jar 這裡是字元串 $ java -Dmmseg.dic.path=/usr/local/apache-solr/example/solr -cp .:mmseg4j-all-1.7.2.jar com.chenlb.mmseg4j.example.Simple 這裡是字元串 $ java -Dmmseg.dic.path=/usr/local/apache-solr/example/solr -cp .:mmseg4j-all-1.7.2.jar com.chenlb.mmseg4j.example.MaxWord 這裡是字元串
mmseg4j 在 solr 中主要支持兩個參數:mode、dicPath。mode 表示是什麼模式分詞(有效值:simplex、complex、max-word,如果輸入了無效的預設用 max-word。)。dicPath 是詞庫目錄可以是絶對目錄,也可以是相對目錄(是相對 solr.home 目錄下的,dic 就會在 solr.home/dic 目錄下找詞庫檔案),如果不指定就是預設在 CWD/data 目錄(程序運行當前目錄的data子目錄)下找。
分詞例子
<fieldtype name="textComplex" class="solr.TextField"> <analyzer> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="complex" dicPath="dic"> </tokenizer> </analyzer> </fieldtype> <fieldtype name="textMaxWord" class="solr.TextField"> <analyzer> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="max-word" dicPath="dic"> </tokenizer> </analyzer> </fieldtype> <fieldtype name="textSimple" class="solr.TextField"> <analyzer> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="simple" dicPath="/usr/local/apache-solr/example/solr/my_dic"> </tokenizer> </analyzer> </fieldtype>
添加到schema.xml
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" > <analyzer> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="complex" dicPath="dic"/> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType>
http://localhost:8080/solr/admin/analysis.jsp 在 Field 的下拉菜單選擇 name,然後在應用輸入 complex。可以看 mmseg4j 的分詞的結果.
$ cd /usr/local/src/ $ mkdir paoding-analysis-2.0.4-beta $ cd paoding-analysis-2.0.4-beta/ $ wget http://paoding.googlecode.com/files/paoding-analysis-2.0.4-beta.zip $ unzip paoding-analysis-2.0.4-beta.zip $ cp paoding-analysis.jar /usr/local/apache-solr/example/multicore/lib/
ChineseTokenizerFactory