OracleText
Jump to navigation
Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
This article will describe, in short, what I have done to speed up queries which searches inside email bodies/attachments and RTFM articles which contain large amounts of WikiText.
There are 3 things that need to be done:
- patch SearchBuilder, to remove pre/post pend of %, change LIKE to contain(Content,'searchstring')>0
- change RTFM Search.html, remove pre/post pend of % on WikiText Customfields
- add Oracle Text indexes for Attachments.content and ObjectCustomFieldValues.largecontent
Attached are the file needed todo this, that functionality isn't there anymore ;-( So here they come inline
Start Searchbuilder patch no. 1
--- SearchBuilder.pm.orig 2007-07-07 22:45:00.000000000 +0200 +++ SearchBuilder.pm 2007-07-10 08:49:00.000000000 +0200 @@ -767,9 +767,15 @@ if ( $args{'FIELD'} ) { - #If it's a like, we supply the %s around the search term + #If it's a like, we supply the %s around the search term only if its not Oracle + #because for Oracle we'll use where contains(content,'text')>1 if ( $args{'OPERATOR'} =~ /LIKE/i ) { - $args{'VALUE'} = "%" . $args{'VALUE'} . "%"; + if ( $RT::DatabaseType eq 'Oracle') { + $args{'VALUE'} = $args{'VALUE'} + } + else { + $args{'VALUE'} = "%" . $args{'VALUE'} . "%"; + } } elsif ( $args{'OPERATOR'} =~ /STARTSWITH/i ) { $args{'VALUE'} = $args{'VALUE'} . "%"; @@ -932,6 +938,49 @@ value => $args{'VALUE'}, }; +# Keep the original clause and modify if dbtype is Oracle +# This patch makes searching for ticket content real fast when you have the appropriate +# index on Attachments.Content and ObjectCustomFieldValues.Largecontent for RTFM +# CREATE INDEX CNT ON ATTACHMENTS (CONTENT) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M'); +# CREATE INDEX FM_LARGE_CNT ON ObjectCustomFieldValues(Largecontent) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M'); +# parameters is tunable, see Oracle docs ;-) +# fixed the static replace Attachments_2.Content by dynamic version for both RT and RTFM Content fields +# also changed RTFM/Article/Search.html NOT to pre and post pend '%' +# other things needed is a FMT column for Attachments and ObjectCustomFieldValues +# which takes care of not including base64 text into the index +# does this by using a trigger which fills the FMT column and using the appropriate +# options when creating the Text index. (saved 25% of unneeded index room ~1Gb out of ~4Gb) + if ( $RT::DatabaseType eq 'Oracle' ) { + my ($tmpfield); + # First fix RT to use OracleText when searching Ticket Content + if ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/ and $args{'OPERATOR'} eq 'LIKE') { + # get rid of the lower since you can't have a functional context index. + # don't care about the remaining (), they don't have a significant effect on performance + # you can however tell Context to index casesensitive or caseinsensitive. + # this is probably not very elegant, excuse my perl ;-) + $tmpfield = $QualifiedField; + $tmpfield =~ s/(lower)?//g; + $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)'; + } + elsif ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/ and $args{'OPERATOR'} eq 'NOT LIKE') { + $tmpfield = $QualifiedField; + $tmpfield =~ s/(lower)?//g; + $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)'; + } + # Second fix RTFM to use OracleText when searching Article Content and LargeContent + elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'LIKE') { + $tmpfield = $QualifiedField; + $tmpfield =~ s/(lower)?//g; + $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)'; + } + elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'NOT LIKE') { + $tmpfield = $QualifiedField; + $tmpfield =~ s/(lower)?//g; + $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)'; + } + + } + # Juju because this should come _AFTER_ the EA my @prefix; if ( $self->{_open_parens}{ $ClauseId } ) {
End Searchbuilder patch no. 1
Start Searchbuilder patch no. 2
--- Handle.pm.orig 2007-04-10 22:48:42.000000000 +0200 +++ Handle.pm 2007-07-12 11:13:00.000000000 +0200 @@ -1101,6 +1101,14 @@ # of here as we can't parse subclauses return 1 if grep $_ ne 'generic_restrictions', keys %{ $args{'SearchBuilder'}->{'subclauses'} }; +# JvdW 2007-07-12 +# Get out of this function if were dealing with either Attachments or ObjectCustomFieldValues +# since these are modified and parser trips over '(contains(content,'test')>0) +# haven't seen any side effect but that doesn't mean there aren't any. +# feel free to fix this lousy perl of mine ;-) + return 1 if $args{'ALIAS'} =~ /Attachments_\d+/ ; + return 1 if $args{'ALIAS'} =~ /ObjectCustomFieldValues_\d+/ ; + # build full list of generic conditions my @conditions; foreach ( grep @$_, values %{ $args{'SearchBuilder'}->{'restrictions'} } ) {
End Searchbuilder patch no. 2
Start OracleTest part
/* Direct_datastore is for text in 1 column further no attributes */ begin ctx_ddl.drop_preference('"CNT_DST"'); end; begin ctx_ddl.create_preference('"CNT_DST"','DIRECT_DATASTORE'); end; / /* NULL_FILTER because we only want text indexing or auto_filter because*/ /* we add an extra fmt column to Attachments which holds a flag whether */ /* that row is indexed or not */ begin ctx_ddl.drop_preference('"CNT_FIL"'); end; / begin ctx_ddl.create_preference('"CNT_FIL"','AUTO_FILTER'); end; / begin ctx_ddl.drop_section_group('"CNT_SGP"'); end; / begin ctx_ddl.create_section_group('"CNT_SGP"','NULL_SECTION_GROUP'); end; / /* BASIC_LEXER is sufficient because we only index English/Dutch */ /* Its wise to set some attributes, look at Oracle Docs for which */ begin ctx_ddl.drop_preference('"CNT_LEX"'); end; / begin ctx_ddl.create_preference('"CNT_LEX"','BASIC_LEXER'); ctx_ddl.set_attribute('CNT_LEX', 'composite', 'DUTCH'); ctx_ddl.set_attribute('CNT_LEX', 'index_themes', 'NO'); ctx_ddl.set_attribute('CNT_LEX', 'index_stems', 'DUTCH'); ctx_ddl.set_attribute('CNT_LEX', 'index_text', 'YES'); ctx_ddl.set_attribute('CNT_LEX', 'printjoins', '_-.'); end; / /* BASIC_WORDLIST to set properties of stop words */ /* in my case I have added Dutch and English, edit to needs */ begin ctx_ddl.drop_preference('"CNT_WDL"'); end; / begin ctx_ddl.create_preference('"CNT_WDL"','BASIC_WORDLIST'); ctx_ddl.set_attribute('"CNT_WDL"','STEMMER','DUTCH'); ctx_ddl.set_attribute('"CNT_WDL"','FUZZY_MATCH','DUTCH'); end; / begin ctx_ddl.drop_stoplist('"CNT_SPL"'); end; / begin ctx_ddl.create_stoplist('"CNT_SPL"','BASIC_STOPLIST'); ctx_ddl.add_stopword('"CNT_SPL"','Corp'); ctx_ddl.add_stopword('"CNT_SPL"','Mr'); ctx_ddl.add_stopword('"CNT_SPL"','Mrs'); ctx_ddl.add_stopword('"CNT_SPL"','Ms'); ctx_ddl.add_stopword('"CNT_SPL"','Mz'); ctx_ddl.add_stopword('"CNT_SPL"','a'); ctx_ddl.add_stopword('"CNT_SPL"','about'); ctx_ddl.add_stopword('"CNT_SPL"','after'); ctx_ddl.add_stopword('"CNT_SPL"','all'); ctx_ddl.add_stopword('"CNT_SPL"','also'); ctx_ddl.add_stopword('"CNT_SPL"','an'); ctx_ddl.add_stopword('"CNT_SPL"','and'); ctx_ddl.add_stopword('"CNT_SPL"','any'); ctx_ddl.add_stopword('"CNT_SPL"','are'); ctx_ddl.add_stopword('"CNT_SPL"','as'); ctx_ddl.add_stopword('"CNT_SPL"','at'); ctx_ddl.add_stopword('"CNT_SPL"','be'); ctx_ddl.add_stopword('"CNT_SPL"','because'); ctx_ddl.add_stopword('"CNT_SPL"','been'); ctx_ddl.add_stopword('"CNT_SPL"','but'); ctx_ddl.add_stopword('"CNT_SPL"','by'); ctx_ddl.add_stopword('"CNT_SPL"','can'); ctx_ddl.add_stopword('"CNT_SPL"','co'); ctx_ddl.add_stopword('"CNT_SPL"','could'); ctx_ddl.add_stopword('"CNT_SPL"','for'); ctx_ddl.add_stopword('"CNT_SPL"','from'); ctx_ddl.add_stopword('"CNT_SPL"','had'); ctx_ddl.add_stopword('"CNT_SPL"','has'); ctx_ddl.add_stopword('"CNT_SPL"','have'); ctx_ddl.add_stopword('"CNT_SPL"','he'); ctx_ddl.add_stopword('"CNT_SPL"','her'); ctx_ddl.add_stopword('"CNT_SPL"','his'); ctx_ddl.add_stopword('"CNT_SPL"','if'); ctx_ddl.add_stopword('"CNT_SPL"','in'); ctx_ddl.add_stopword('"CNT_SPL"','inc'); ctx_ddl.add_stopword('"CNT_SPL"','into'); ctx_ddl.add_stopword('"CNT_SPL"','is'); ctx_ddl.add_stopword('"CNT_SPL"','it'); ctx_ddl.add_stopword('"CNT_SPL"','its'); ctx_ddl.add_stopword('"CNT_SPL"','last'); ctx_ddl.add_stopword('"CNT_SPL"','more'); ctx_ddl.add_stopword('"CNT_SPL"','most'); ctx_ddl.add_stopword('"CNT_SPL"','no'); ctx_ddl.add_stopword('"CNT_SPL"','not'); ctx_ddl.add_stopword('"CNT_SPL"','of'); ctx_ddl.add_stopword('"CNT_SPL"','on'); ctx_ddl.add_stopword('"CNT_SPL"','one'); ctx_ddl.add_stopword('"CNT_SPL"','only'); ctx_ddl.add_stopword('"CNT_SPL"','or'); ctx_ddl.add_stopword('"CNT_SPL"','other'); ctx_ddl.add_stopword('"CNT_SPL"','out'); ctx_ddl.add_stopword('"CNT_SPL"','over'); ctx_ddl.add_stopword('"CNT_SPL"','s'); ctx_ddl.add_stopword('"CNT_SPL"','says'); ctx_ddl.add_stopword('"CNT_SPL"','she'); ctx_ddl.add_stopword('"CNT_SPL"','so'); ctx_ddl.add_stopword('"CNT_SPL"','some'); ctx_ddl.add_stopword('"CNT_SPL"','such'); ctx_ddl.add_stopword('"CNT_SPL"','than'); ctx_ddl.add_stopword('"CNT_SPL"','that'); ctx_ddl.add_stopword('"CNT_SPL"','the'); ctx_ddl.add_stopword('"CNT_SPL"','their'); ctx_ddl.add_stopword('"CNT_SPL"','there'); ctx_ddl.add_stopword('"CNT_SPL"','they'); ctx_ddl.add_stopword('"CNT_SPL"','this'); ctx_ddl.add_stopword('"CNT_SPL"','to'); ctx_ddl.add_stopword('"CNT_SPL"','up'); ctx_ddl.add_stopword('"CNT_SPL"','was'); ctx_ddl.add_stopword('"CNT_SPL"','we'); ctx_ddl.add_stopword('"CNT_SPL"','were'); ctx_ddl.add_stopword('"CNT_SPL"','when'); ctx_ddl.add_stopword('"CNT_SPL"','which'); ctx_ddl.add_stopword('"CNT_SPL"','who'); ctx_ddl.add_stopword('"CNT_SPL"','will'); ctx_ddl.add_stopword('"CNT_SPL"','with'); ctx_ddl.add_stopword('"CNT_SPL"','would'); ctx_ddl.add_stopword('"CNT_SPL"','aan'); ctx_ddl.add_stopword('"CNT_SPL"','aangaande'); ctx_ddl.add_stopword('"CNT_SPL"','aangezien'); ctx_ddl.add_stopword('"CNT_SPL"','achter'); ctx_ddl.add_stopword('"CNT_SPL"','achterna'); ctx_ddl.add_stopword('"CNT_SPL"','afgelopen'); ctx_ddl.add_stopword('"CNT_SPL"','al'); ctx_ddl.add_stopword('"CNT_SPL"','aldaar'); ctx_ddl.add_stopword('"CNT_SPL"','aldus'); ctx_ddl.add_stopword('"CNT_SPL"','alhoewel'); ctx_ddl.add_stopword('"CNT_SPL"','alias'); ctx_ddl.add_stopword('"CNT_SPL"','alle'); ctx_ddl.add_stopword('"CNT_SPL"','allebei'); ctx_ddl.add_stopword('"CNT_SPL"','alleen'); ctx_ddl.add_stopword('"CNT_SPL"','alsnog'); ctx_ddl.add_stopword('"CNT_SPL"','altijd'); ctx_ddl.add_stopword('"CNT_SPL"','altoos'); ctx_ddl.add_stopword('"CNT_SPL"','ander'); ctx_ddl.add_stopword('"CNT_SPL"','andere'); ctx_ddl.add_stopword('"CNT_SPL"','anders'); ctx_ddl.add_stopword('"CNT_SPL"','anderszins'); ctx_ddl.add_stopword('"CNT_SPL"','behalve'); ctx_ddl.add_stopword('"CNT_SPL"','behoudens'); ctx_ddl.add_stopword('"CNT_SPL"','beide'); ctx_ddl.add_stopword('"CNT_SPL"','beiden'); ctx_ddl.add_stopword('"CNT_SPL"','ben'); ctx_ddl.add_stopword('"CNT_SPL"','beneden'); ctx_ddl.add_stopword('"CNT_SPL"','bent'); ctx_ddl.add_stopword('"CNT_SPL"','bepaald'); ctx_ddl.add_stopword('"CNT_SPL"','betreffende'); ctx_ddl.add_stopword('"CNT_SPL"','bij'); ctx_ddl.add_stopword('"CNT_SPL"','binnen'); ctx_ddl.add_stopword('"CNT_SPL"','binnenin'); ctx_ddl.add_stopword('"CNT_SPL"','boven'); ctx_ddl.add_stopword('"CNT_SPL"','bovenal'); ctx_ddl.add_stopword('"CNT_SPL"','bovendien'); ctx_ddl.add_stopword('"CNT_SPL"','bovengenoemd'); ctx_ddl.add_stopword('"CNT_SPL"','bovenstaand'); ctx_ddl.add_stopword('"CNT_SPL"','bovenvermeld'); ctx_ddl.add_stopword('"CNT_SPL"','buiten'); ctx_ddl.add_stopword('"CNT_SPL"','daar'); ctx_ddl.add_stopword('"CNT_SPL"','daarheen'); ctx_ddl.add_stopword('"CNT_SPL"','daarin'); ctx_ddl.add_stopword('"CNT_SPL"','daarna'); ctx_ddl.add_stopword('"CNT_SPL"','daarnet'); ctx_ddl.add_stopword('"CNT_SPL"','daarom'); ctx_ddl.add_stopword('"CNT_SPL"','daarop'); ctx_ddl.add_stopword('"CNT_SPL"','daarvanlangs'); ctx_ddl.add_stopword('"CNT_SPL"','dan'); ctx_ddl.add_stopword('"CNT_SPL"','dat'); ctx_ddl.add_stopword('"CNT_SPL"','de'); ctx_ddl.add_stopword('"CNT_SPL"','die'); ctx_ddl.add_stopword('"CNT_SPL"','dikwijls'); ctx_ddl.add_stopword('"CNT_SPL"','dit'); ctx_ddl.add_stopword('"CNT_SPL"','door'); ctx_ddl.add_stopword('"CNT_SPL"','doorgaand'); ctx_ddl.add_stopword('"CNT_SPL"','dus'); ctx_ddl.add_stopword('"CNT_SPL"','echter'); ctx_ddl.add_stopword('"CNT_SPL"','eer'); ctx_ddl.add_stopword('"CNT_SPL"','eerdat'); ctx_ddl.add_stopword('"CNT_SPL"','eerder'); ctx_ddl.add_stopword('"CNT_SPL"','eerlang'); ctx_ddl.add_stopword('"CNT_SPL"','eerst'); ctx_ddl.add_stopword('"CNT_SPL"','elk'); ctx_ddl.add_stopword('"CNT_SPL"','elke'); ctx_ddl.add_stopword('"CNT_SPL"','en'); ctx_ddl.add_stopword('"CNT_SPL"','enig'); ctx_ddl.add_stopword('"CNT_SPL"','enigszins'); ctx_ddl.add_stopword('"CNT_SPL"','enkel'); ctx_ddl.add_stopword('"CNT_SPL"','er'); ctx_ddl.add_stopword('"CNT_SPL"','erdoor'); ctx_ddl.add_stopword('"CNT_SPL"','even'); ctx_ddl.add_stopword('"CNT_SPL"','eveneens'); ctx_ddl.add_stopword('"CNT_SPL"','evenwel'); ctx_ddl.add_stopword('"CNT_SPL"','gauw'); ctx_ddl.add_stopword('"CNT_SPL"','gedurende'); ctx_ddl.add_stopword('"CNT_SPL"','geen'); ctx_ddl.add_stopword('"CNT_SPL"','gehad'); ctx_ddl.add_stopword('"CNT_SPL"','gekund'); ctx_ddl.add_stopword('"CNT_SPL"','geleden'); ctx_ddl.add_stopword('"CNT_SPL"','gelijk'); ctx_ddl.add_stopword('"CNT_SPL"','gemoeten'); ctx_ddl.add_stopword('"CNT_SPL"','gemogen'); ctx_ddl.add_stopword('"CNT_SPL"','geweest'); ctx_ddl.add_stopword('"CNT_SPL"','gewoon'); ctx_ddl.add_stopword('"CNT_SPL"','gewoonweg'); ctx_ddl.add_stopword('"CNT_SPL"','haar'); ctx_ddl.add_stopword('"CNT_SPL"','hadden'); ctx_ddl.add_stopword('"CNT_SPL"','hare'); ctx_ddl.add_stopword('"CNT_SPL"','heb'); ctx_ddl.add_stopword('"CNT_SPL"','hebben'); ctx_ddl.add_stopword('"CNT_SPL"','hebt'); ctx_ddl.add_stopword('"CNT_SPL"','heeft'); ctx_ddl.add_stopword('"CNT_SPL"','hem'); ctx_ddl.add_stopword('"CNT_SPL"','hen'); ctx_ddl.add_stopword('"CNT_SPL"','het'); ctx_ddl.add_stopword('"CNT_SPL"','hierbeneden'); ctx_ddl.add_stopword('"CNT_SPL"','hierboven'); ctx_ddl.add_stopword('"CNT_SPL"','hij'); ctx_ddl.add_stopword('"CNT_SPL"','hoe'); ctx_ddl.add_stopword('"CNT_SPL"','hoewel'); ctx_ddl.add_stopword('"CNT_SPL"','hun'); ctx_ddl.add_stopword('"CNT_SPL"','hunne'); ctx_ddl.add_stopword('"CNT_SPL"','ik'); ctx_ddl.add_stopword('"CNT_SPL"','ikzelf'); /* ctx_ddl.add_stopword('"CNT_SPL"','in');*/ ctx_ddl.add_stopword('"CNT_SPL"','inmiddels'); ctx_ddl.add_stopword('"CNT_SPL"','inzake'); /* ctx_ddl.add_stopword('"CNT_SPL"','is');*/ ctx_ddl.add_stopword('"CNT_SPL"','jezelf'); ctx_ddl.add_stopword('"CNT_SPL"','jij'); ctx_ddl.add_stopword('"CNT_SPL"','jijzelf'); ctx_ddl.add_stopword('"CNT_SPL"','jou'); ctx_ddl.add_stopword('"CNT_SPL"','jouw'); ctx_ddl.add_stopword('"CNT_SPL"','jouwe'); ctx_ddl.add_stopword('"CNT_SPL"','juist'); ctx_ddl.add_stopword('"CNT_SPL"','jullie'); ctx_ddl.add_stopword('"CNT_SPL"','kan'); ctx_ddl.add_stopword('"CNT_SPL"','klaar'); ctx_ddl.add_stopword('"CNT_SPL"','kon'); ctx_ddl.add_stopword('"CNT_SPL"','konden'); ctx_ddl.add_stopword('"CNT_SPL"','krachtens'); ctx_ddl.add_stopword('"CNT_SPL"','kunnen'); ctx_ddl.add_stopword('"CNT_SPL"','kunt'); ctx_ddl.add_stopword('"CNT_SPL"','later'); ctx_ddl.add_stopword('"CNT_SPL"','liever'); ctx_ddl.add_stopword('"CNT_SPL"','maar'); ctx_ddl.add_stopword('"CNT_SPL"','mag'); ctx_ddl.add_stopword('"CNT_SPL"','meer'); ctx_ddl.add_stopword('"CNT_SPL"','met'); ctx_ddl.add_stopword('"CNT_SPL"','mezelf'); ctx_ddl.add_stopword('"CNT_SPL"','mij'); ctx_ddl.add_stopword('"CNT_SPL"','mijn'); ctx_ddl.add_stopword('"CNT_SPL"','mijnent'); ctx_ddl.add_stopword('"CNT_SPL"','mijner'); ctx_ddl.add_stopword('"CNT_SPL"','mijzelf'); ctx_ddl.add_stopword('"CNT_SPL"','misschien'); ctx_ddl.add_stopword('"CNT_SPL"','mocht'); ctx_ddl.add_stopword('"CNT_SPL"','mochten'); ctx_ddl.add_stopword('"CNT_SPL"','moest'); ctx_ddl.add_stopword('"CNT_SPL"','moesten'); ctx_ddl.add_stopword('"CNT_SPL"','moet'); ctx_ddl.add_stopword('"CNT_SPL"','moeten'); ctx_ddl.add_stopword('"CNT_SPL"','mogen'); ctx_ddl.add_stopword('"CNT_SPL"','na'); ctx_ddl.add_stopword('"CNT_SPL"','naar'); ctx_ddl.add_stopword('"CNT_SPL"','nadat'); ctx_ddl.add_stopword('"CNT_SPL"','net'); ctx_ddl.add_stopword('"CNT_SPL"','niet'); ctx_ddl.add_stopword('"CNT_SPL"','noch'); ctx_ddl.add_stopword('"CNT_SPL"','nog'); ctx_ddl.add_stopword('"CNT_SPL"','nogal'); ctx_ddl.add_stopword('"CNT_SPL"','nu'); /* ctx_ddl.add_stopword('"CNT_SPL"','of');*/ ctx_ddl.add_stopword('"CNT_SPL"','ofschoon'); ctx_ddl.add_stopword('"CNT_SPL"','om'); ctx_ddl.add_stopword('"CNT_SPL"','omdat'); ctx_ddl.add_stopword('"CNT_SPL"','omhoog'); ctx_ddl.add_stopword('"CNT_SPL"','omlaag'); ctx_ddl.add_stopword('"CNT_SPL"','omstreeks'); ctx_ddl.add_stopword('"CNT_SPL"','omtrent'); ctx_ddl.add_stopword('"CNT_SPL"','omver'); ctx_ddl.add_stopword('"CNT_SPL"','onder'); ctx_ddl.add_stopword('"CNT_SPL"','ondertussen'); ctx_ddl.add_stopword('"CNT_SPL"','ongeveer'); ctx_ddl.add_stopword('"CNT_SPL"','ons'); ctx_ddl.add_stopword('"CNT_SPL"','onszelf'); ctx_ddl.add_stopword('"CNT_SPL"','onze'); ctx_ddl.add_stopword('"CNT_SPL"','ook'); ctx_ddl.add_stopword('"CNT_SPL"','op'); ctx_ddl.add_stopword('"CNT_SPL"','opnieuw'); ctx_ddl.add_stopword('"CNT_SPL"','opzij'); /* ctx_ddl.add_stopword('"CNT_SPL"','over');*/ ctx_ddl.add_stopword('"CNT_SPL"','overeind'); ctx_ddl.add_stopword('"CNT_SPL"','overigens'); ctx_ddl.add_stopword('"CNT_SPL"','pas'); ctx_ddl.add_stopword('"CNT_SPL"','precies'); ctx_ddl.add_stopword('"CNT_SPL"','reeds'); ctx_ddl.add_stopword('"CNT_SPL"','rond'); ctx_ddl.add_stopword('"CNT_SPL"','rondom'); ctx_ddl.add_stopword('"CNT_SPL"','sedert'); ctx_ddl.add_stopword('"CNT_SPL"','sinds'); ctx_ddl.add_stopword('"CNT_SPL"','sindsdien'); ctx_ddl.add_stopword('"CNT_SPL"','slechts'); ctx_ddl.add_stopword('"CNT_SPL"','sommige'); ctx_ddl.add_stopword('"CNT_SPL"','spoedig'); ctx_ddl.add_stopword('"CNT_SPL"','steeds'); ctx_ddl.add_stopword('"CNT_SPL"','tamelijk'); ctx_ddl.add_stopword('"CNT_SPL"','tenzij'); ctx_ddl.add_stopword('"CNT_SPL"','terwijl'); ctx_ddl.add_stopword('"CNT_SPL"','thans'); ctx_ddl.add_stopword('"CNT_SPL"','tijdens'); ctx_ddl.add_stopword('"CNT_SPL"','toch'); ctx_ddl.add_stopword('"CNT_SPL"','toen'); ctx_ddl.add_stopword('"CNT_SPL"','toenmaals'); ctx_ddl.add_stopword('"CNT_SPL"','toenmalig'); ctx_ddl.add_stopword('"CNT_SPL"','tot'); ctx_ddl.add_stopword('"CNT_SPL"','totdat'); ctx_ddl.add_stopword('"CNT_SPL"','tussen'); ctx_ddl.add_stopword('"CNT_SPL"','uit'); ctx_ddl.add_stopword('"CNT_SPL"','uitgezonderd'); ctx_ddl.add_stopword('"CNT_SPL"','vaak'); ctx_ddl.add_stopword('"CNT_SPL"','van'); ctx_ddl.add_stopword('"CNT_SPL"','vandaan'); ctx_ddl.add_stopword('"CNT_SPL"','vanuit'); ctx_ddl.add_stopword('"CNT_SPL"','vanwege'); ctx_ddl.add_stopword('"CNT_SPL"','veeleer'); ctx_ddl.add_stopword('"CNT_SPL"','verder'); ctx_ddl.add_stopword('"CNT_SPL"','vervolgens'); ctx_ddl.add_stopword('"CNT_SPL"','vol'); ctx_ddl.add_stopword('"CNT_SPL"','volgens'); ctx_ddl.add_stopword('"CNT_SPL"','voor'); ctx_ddl.add_stopword('"CNT_SPL"','vooraf'); ctx_ddl.add_stopword('"CNT_SPL"','vooral'); ctx_ddl.add_stopword('"CNT_SPL"','vooralsnog'); ctx_ddl.add_stopword('"CNT_SPL"','voorbij'); ctx_ddl.add_stopword('"CNT_SPL"','voordat'); ctx_ddl.add_stopword('"CNT_SPL"','voordezen'); ctx_ddl.add_stopword('"CNT_SPL"','voordien'); ctx_ddl.add_stopword('"CNT_SPL"','voorheen'); ctx_ddl.add_stopword('"CNT_SPL"','voorop'); ctx_ddl.add_stopword('"CNT_SPL"','vooruit'); ctx_ddl.add_stopword('"CNT_SPL"','vrij'); ctx_ddl.add_stopword('"CNT_SPL"','vroeg'); ctx_ddl.add_stopword('"CNT_SPL"','waar'); ctx_ddl.add_stopword('"CNT_SPL"','waarom'); ctx_ddl.add_stopword('"CNT_SPL"','wanneer'); ctx_ddl.add_stopword('"CNT_SPL"','want'); ctx_ddl.add_stopword('"CNT_SPL"','waren'); /* ctx_ddl.add_stopword('"CNT_SPL"','was');*/ ctx_ddl.add_stopword('"CNT_SPL"','wat'); ctx_ddl.add_stopword('"CNT_SPL"','weer'); ctx_ddl.add_stopword('"CNT_SPL"','weg'); ctx_ddl.add_stopword('"CNT_SPL"','wegens'); ctx_ddl.add_stopword('"CNT_SPL"','wel'); ctx_ddl.add_stopword('"CNT_SPL"','weldra'); ctx_ddl.add_stopword('"CNT_SPL"','welk'); ctx_ddl.add_stopword('"CNT_SPL"','welke'); ctx_ddl.add_stopword('"CNT_SPL"','wie'); ctx_ddl.add_stopword('"CNT_SPL"','wiens'); ctx_ddl.add_stopword('"CNT_SPL"','wier'); ctx_ddl.add_stopword('"CNT_SPL"','wij'); ctx_ddl.add_stopword('"CNT_SPL"','wijzelf'); ctx_ddl.add_stopword('"CNT_SPL"','zal'); ctx_ddl.add_stopword('"CNT_SPL"','ze'); ctx_ddl.add_stopword('"CNT_SPL"','zelfs'); ctx_ddl.add_stopword('"CNT_SPL"','zichzelf'); ctx_ddl.add_stopword('"CNT_SPL"','zij'); ctx_ddl.add_stopword('"CNT_SPL"','zijn'); ctx_ddl.add_stopword('"CNT_SPL"','zijne'); ctx_ddl.add_stopword('"CNT_SPL"','zo'); ctx_ddl.add_stopword('"CNT_SPL"','zodra'); ctx_ddl.add_stopword('"CNT_SPL"','zonder'); ctx_ddl.add_stopword('"CNT_SPL"','zou'); ctx_ddl.add_stopword('"CNT_SPL"','zouden'); ctx_ddl.add_stopword('"CNT_SPL"','zowat'); ctx_ddl.add_stopword('"CNT_SPL"','zulke'); ctx_ddl.add_stopword('"CNT_SPL"','zullen'); ctx_ddl.add_stopword('"CNT_SPL"','zult'); end; / begin ctx_ddl.drop_preference('"CNT_STO"'); end; / begin ctx_ddl.create_preference('"CNT_STO"','BASIC_STORAGE'); ctx_ddl.set_attribute('"CNT_STO"','R_TABLE_CLAUSE','lob (data) store as (cache )'); ctx_ddl.set_attribute('"CNT_STO"','I_INDEX_CLAUSE','compress 2'); end; / /* Debug/progress check, will log in $ORACLE_HOME/ctx/log */ begin ctx_output.start_log('CNT_LOG'); end; / /* If this is a production database, fill in the added column */ /* with the correct info to rebuild the index later */ update attachments set fmt='ignore' where contentencoding is NULL update attachments set fmt='ignore' where contentencoding = 'base64' update attachments set fmt='text' where contentencoding = 'none' update attachments set fmt='text' where contentencoding = 'quoted-printable' /* Extent the Attachments table with an extra column */ /* This needs te be done only once */ ALTER TABLE RT_USER.ATTACHMENTS ADD (context_fmt VARCHAR2(10)); drop index cnt; create index "RT_USER"."CNT" on "RT_USER"."ATTACHMENTS" ("CONTENT") indextype is ctxsys.context parameters(' format column context_fmt datastore "CNT_DST" filter "CNT_FIL" section group "CNT_SGP" lexer "CNT_LEX" wordlist "CNT_WDL" stoplist "CNT_SPL" storage "CNT_STO" ') / begin ctx_output.end_log; end; / /* Add a trigger so that new rows are properly tagged */ CREATE OR REPLACE TRIGGER RT_USER.BI_Att BEFORE INSERT ON RT_USER.ATTACHMENTS REFERENCING NEW AS New OLD AS Old FOR EACH ROW DECLARE /****************************************************************************** NAME: PURPOSE: REVISIONS: Ver Date Author Description --------- ---------- --------------- ------------------------------------ 1.0 24-5-2007 Joop v/d Wege 1. Created this trigger. NOTES: Trigger takes care of filling the context_fmt column with the right value so that only text is really indexed and not base64 rows ******************************************************************************/ BEGIN IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN :new.context_fmt := 'ignore'; ELSE :new.context_fmt := 'text'; END IF; END ; / /* There needs to be a job that on a regular basis refreshes the index */ BEGIN SYS.DBMS_JOB.REMOVE(21); COMMIT; END; / DECLARE X NUMBER; BEGIN SYS.DBMS_JOB.SUBMIT ( job => X ,what => 'DECLARE stmt VARCHAR2(200); BEGIN stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')''; EXECUTE IMMEDIATE stmt; END; ' ,next_date => to_date('20-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss') ,interval => 'TRUNC(SYSDATE+1)+6/24' ,no_parse => FALSE ); SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x)); COMMIT; END; / /* test if things work */ /* first run is slow, second should be fast <500ms) */ select * from attachments where contains(content,'accounts')>0; /****************************************************************************/ /* Repeat for ObjectCustomFieldValues */ /****************************************************************************/ alter table objectcustomfieldvalues add (context_fmt varchar2(10)); update objectcustomfieldvalues set context_fmt='ignore' where contentencoding is NULL; update objectcustomfieldvalues set context_fmt='ignore' where contentencoding = 'base64'; update objectcustomfieldvalues set context_fmt='text' where contentencoding = 'none'; update objectcustomfieldvalues set context_fmt='text' where contentencoding = 'quoted-printable'; CREATE OR REPLACE TRIGGER RT_USER.BI_OCFV BEFORE INSERT ON RT_USER.objectcustomfieldvalues REFERENCING NEW AS New OLD AS Old FOR EACH ROW DECLARE /****************************************************************************** NAME: PURPOSE: REVISIONS: Ver Date Author Description --------- ---------- --------------- ------------------------------------ 1.0 24-5-2007 Joop v/d Wege 1. Created this trigger. NOTES: Trigger takes care of filling the context_fmt column with the right value so that only text is really indexed and not base64 rows ******************************************************************************/ BEGIN IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN :new.context_fmt := 'ignore'; ELSE :new.context_fmt := 'text'; END IF; END ; / drop index ocfv_cnt; create index ocfv_cnt on "RT_USER"."OBJECTCUSTOMFIELDVALUES" ("LARGECONTENT") indextype is ctxsys.context parameters(' format column context_fmt datastore "CNT_DST" filter "CNT_FIL" section group "CNT_SGP" lexer "CNT_LEX" wordlist "CNT_WDL" stoplist "CNT_SPL" storage "CNT_STO" ') / select * from objectcustomfieldvalues where contains(largecontent,'accounts')>0 /* depending on whether both RT and RTFM are used use this job instead of the */ /* previous one */ BEGIN SYS.DBMS_JOB.REMOVE(21); COMMIT; END; / DECLARE X NUMBER; BEGIN SYS.DBMS_JOB.SUBMIT ( job => X ,what => 'DECLARE stmt VARCHAR2(200); BEGIN stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')''; EXECUTE IMMEDIATE stmt; stmt := ''ALTER INDEX ocfv_cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')''; EXECUTE IMMEDIATE stmt; END; ' ,next_date => to_date('25-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss') ,interval => 'TRUNC(SYSDATE+1)+6/24' ,no_parse => FALSE ); SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x)); COMMIT; END; /