OracleText

From Request Tracker Wiki
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

This article will describe, in short, what I have done to speed up queries which searches inside email bodies/attachments and RTFM articles which contain large amounts of WikiText.

There are 3 things that need to be done:

  • patch SearchBuilder, to remove pre/post pend of %, change LIKE to contain(Content,'searchstring')>0
  • change RTFM Search.html, remove pre/post pend of % on WikiText Customfields
  • add Oracle Text indexes for Attachments.content and ObjectCustomFieldValues.largecontent

Attached are the file needed todo this, that functionality isn't there anymore ;-( So here they come inline

Start Searchbuilder patch no. 1

--- SearchBuilder.pm.orig	2007-07-07 22:45:00.000000000 +0200
+++ SearchBuilder.pm	2007-07-10 08:49:00.000000000 +0200
@@ -767,9 +767,15 @@

     if ( $args{'FIELD'} ) {

-        #If it's a like, we supply the %s around the search term
+        #If it's a like, we supply the %s around the search term only if its not Oracle
+        #because for Oracle we'll use where contains(content,'text')>1
         if ( $args{'OPERATOR'} =~ /LIKE/i ) {
-            $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+            if ( $RT::DatabaseType eq 'Oracle') {
+                 $args{'VALUE'} = $args{'VALUE'}
+            }
+            else {
+                $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+            }
         }
         elsif ( $args{'OPERATOR'} =~ /STARTSWITH/i ) {
             $args{'VALUE'}    = $args{'VALUE'} . "%";
@@ -932,6 +938,49 @@
         value => $args{'VALUE'},
     };

+# Keep the original clause and modify if dbtype is Oracle
+# This patch makes searching for ticket content real fast when you have the appropriate
+# index on Attachments.Content and ObjectCustomFieldValues.Largecontent for RTFM
+# CREATE INDEX CNT ON ATTACHMENTS (CONTENT) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# CREATE INDEX FM_LARGE_CNT ON ObjectCustomFieldValues(Largecontent) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# parameters is tunable, see Oracle docs ;-)
+# fixed the static replace Attachments_2.Content by dynamic version for both RT and RTFM Content fields
+# also changed RTFM/Article/Search.html NOT to pre and post pend '%'
+# other things needed is a FMT column for Attachments and ObjectCustomFieldValues
+# which takes care of not including base64 text into the index
+# does this by using a trigger which fills the FMT column and using the appropriate
+# options when creating the Text index. (saved 25% of unneeded index room ~1Gb out of ~4Gb)
+    if ( $RT::DatabaseType eq 'Oracle' ) {
+        my ($tmpfield);
+		# First fix RT to use OracleText when searching Ticket Content
+        if ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/  and $args{'OPERATOR'} eq 'LIKE') {
+			# get rid of the lower since you can't have a functional context index.
+			# don't care about the remaining (), they don't have a significant effect on performance
+			# you can however tell Context to index casesensitive or caseinsensitive.
+			# this is probably not very elegant, excuse my perl ;-)
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+        elsif ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/  and $args{'OPERATOR'} eq 'NOT LIKE') {
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+		# Second fix RTFM to use OracleText when searching Article Content and LargeContent
+        elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'LIKE') {
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+		elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'NOT LIKE') {
+     	    $tmpfield = $QualifiedField;
+	   	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+
+    }
+
     # Juju because this should come _AFTER_ the EA
     my @prefix;
     if ( $self->{_open_parens}{ $ClauseId } ) {

End Searchbuilder patch no. 1

Start Searchbuilder patch no. 2

--- Handle.pm.orig	2007-04-10 22:48:42.000000000 +0200
+++ Handle.pm	2007-07-12 11:13:00.000000000 +0200
@@ -1101,6 +1101,14 @@
     # of here as we can't parse subclauses
     return 1 if grep $_ ne 'generic_restrictions', keys %{ $args{'SearchBuilder'}->{'subclauses'} };

+# JvdW 2007-07-12
+# Get out of this function if were dealing with either Attachments or ObjectCustomFieldValues
+# since these are modified and parser trips over '(contains(content,'test')>0)
+# haven't seen any side effect but that doesn't mean there aren't any.
+# feel free to fix this lousy perl of mine ;-)
+	return 1 if $args{'ALIAS'} =~ /Attachments_\d+/ ;
+ 	return 1 if $args{'ALIAS'} =~ /ObjectCustomFieldValues_\d+/ ;
+
     # build full list of generic conditions
     my @conditions;
     foreach ( grep @$_, values %{ $args{'SearchBuilder'}->{'restrictions'} } ) {

End Searchbuilder patch no. 2

Start Search.html=== copy share/html/RTFM/Article/Search.hmtl to local/html/RTFM/Article/Search.html edit Search.html (find the first and second occurrence of $value = "%$1%"; and change it to $value = "$1"; leave the third and fourth occurrence alone, they are responsible for searching small string values and don't have a huge impact on performance. ===End Search.html

Start OracleTest part

/* Direct_datastore is for text in 1 column further no attributes */
 
 begin
   ctx_ddl.drop_preference('"CNT_DST"');
 end;
 
 begin
   ctx_ddl.create_preference('"CNT_DST"','DIRECT_DATASTORE');
 end;
 /
 
 /* NULL_FILTER because we only want text indexing or auto_filter because*/
 /* we add an extra fmt column to Attachments which holds a flag whether */
 /* that row is indexed or not */
 
 begin
   ctx_ddl.drop_preference('"CNT_FIL"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_FIL"','AUTO_FILTER');
 end;
 /
 
 begin
   ctx_ddl.drop_section_group('"CNT_SGP"');
 end;
 /
 begin
   ctx_ddl.create_section_group('"CNT_SGP"','NULL_SECTION_GROUP');
 end;
 /
 
 /* BASIC_LEXER is sufficient because we only index English/Dutch  */
 /* Its wise to set some attributes, look at Oracle Docs for which */
 begin
   ctx_ddl.drop_preference('"CNT_LEX"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_LEX"','BASIC_LEXER');
   ctx_ddl.set_attribute('CNT_LEX', 'composite', 'DUTCH');
   ctx_ddl.set_attribute('CNT_LEX', 'index_themes', 'NO');
   ctx_ddl.set_attribute('CNT_LEX', 'index_stems', 'DUTCH');
   ctx_ddl.set_attribute('CNT_LEX', 'index_text', 'YES');
   ctx_ddl.set_attribute('CNT_LEX', 'printjoins', '_-.');
 end;
 /
 
 /* BASIC_WORDLIST to set properties of stop words            */
 /* in my case I have added Dutch and English, edit to needs  */
 begin
   ctx_ddl.drop_preference('"CNT_WDL"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_WDL"','BASIC_WORDLIST');
   ctx_ddl.set_attribute('"CNT_WDL"','STEMMER','DUTCH');
   ctx_ddl.set_attribute('"CNT_WDL"','FUZZY_MATCH','DUTCH');
 end;
 /
 
 begin
   ctx_ddl.drop_stoplist('"CNT_SPL"');
 end;
 /
 begin
   ctx_ddl.create_stoplist('"CNT_SPL"','BASIC_STOPLIST');
   ctx_ddl.add_stopword('"CNT_SPL"','Corp');
   ctx_ddl.add_stopword('"CNT_SPL"','Mr');
   ctx_ddl.add_stopword('"CNT_SPL"','Mrs');
   ctx_ddl.add_stopword('"CNT_SPL"','Ms');
   ctx_ddl.add_stopword('"CNT_SPL"','Mz');
   ctx_ddl.add_stopword('"CNT_SPL"','a');
   ctx_ddl.add_stopword('"CNT_SPL"','about');
   ctx_ddl.add_stopword('"CNT_SPL"','after');
   ctx_ddl.add_stopword('"CNT_SPL"','all');
   ctx_ddl.add_stopword('"CNT_SPL"','also');
   ctx_ddl.add_stopword('"CNT_SPL"','an');
   ctx_ddl.add_stopword('"CNT_SPL"','and');
   ctx_ddl.add_stopword('"CNT_SPL"','any');
   ctx_ddl.add_stopword('"CNT_SPL"','are');
   ctx_ddl.add_stopword('"CNT_SPL"','as');
   ctx_ddl.add_stopword('"CNT_SPL"','at');
   ctx_ddl.add_stopword('"CNT_SPL"','be');
   ctx_ddl.add_stopword('"CNT_SPL"','because');
   ctx_ddl.add_stopword('"CNT_SPL"','been');
   ctx_ddl.add_stopword('"CNT_SPL"','but');
   ctx_ddl.add_stopword('"CNT_SPL"','by');
   ctx_ddl.add_stopword('"CNT_SPL"','can');
   ctx_ddl.add_stopword('"CNT_SPL"','co');
   ctx_ddl.add_stopword('"CNT_SPL"','could');
   ctx_ddl.add_stopword('"CNT_SPL"','for');
   ctx_ddl.add_stopword('"CNT_SPL"','from');
   ctx_ddl.add_stopword('"CNT_SPL"','had');
   ctx_ddl.add_stopword('"CNT_SPL"','has');
   ctx_ddl.add_stopword('"CNT_SPL"','have');
   ctx_ddl.add_stopword('"CNT_SPL"','he');
   ctx_ddl.add_stopword('"CNT_SPL"','her');
   ctx_ddl.add_stopword('"CNT_SPL"','his');
   ctx_ddl.add_stopword('"CNT_SPL"','if');
   ctx_ddl.add_stopword('"CNT_SPL"','in');
   ctx_ddl.add_stopword('"CNT_SPL"','inc');
   ctx_ddl.add_stopword('"CNT_SPL"','into');
   ctx_ddl.add_stopword('"CNT_SPL"','is');
   ctx_ddl.add_stopword('"CNT_SPL"','it');
   ctx_ddl.add_stopword('"CNT_SPL"','its');
   ctx_ddl.add_stopword('"CNT_SPL"','last');
   ctx_ddl.add_stopword('"CNT_SPL"','more');
   ctx_ddl.add_stopword('"CNT_SPL"','most');
   ctx_ddl.add_stopword('"CNT_SPL"','no');
   ctx_ddl.add_stopword('"CNT_SPL"','not');
   ctx_ddl.add_stopword('"CNT_SPL"','of');
   ctx_ddl.add_stopword('"CNT_SPL"','on');
   ctx_ddl.add_stopword('"CNT_SPL"','one');
   ctx_ddl.add_stopword('"CNT_SPL"','only');
   ctx_ddl.add_stopword('"CNT_SPL"','or');
   ctx_ddl.add_stopword('"CNT_SPL"','other');
   ctx_ddl.add_stopword('"CNT_SPL"','out');
   ctx_ddl.add_stopword('"CNT_SPL"','over');
   ctx_ddl.add_stopword('"CNT_SPL"','s');
   ctx_ddl.add_stopword('"CNT_SPL"','says');
   ctx_ddl.add_stopword('"CNT_SPL"','she');
   ctx_ddl.add_stopword('"CNT_SPL"','so');
   ctx_ddl.add_stopword('"CNT_SPL"','some');
   ctx_ddl.add_stopword('"CNT_SPL"','such');
   ctx_ddl.add_stopword('"CNT_SPL"','than');
   ctx_ddl.add_stopword('"CNT_SPL"','that');
   ctx_ddl.add_stopword('"CNT_SPL"','the');
   ctx_ddl.add_stopword('"CNT_SPL"','their');
   ctx_ddl.add_stopword('"CNT_SPL"','there');
   ctx_ddl.add_stopword('"CNT_SPL"','they');
   ctx_ddl.add_stopword('"CNT_SPL"','this');
   ctx_ddl.add_stopword('"CNT_SPL"','to');
   ctx_ddl.add_stopword('"CNT_SPL"','up');
   ctx_ddl.add_stopword('"CNT_SPL"','was');
   ctx_ddl.add_stopword('"CNT_SPL"','we');
   ctx_ddl.add_stopword('"CNT_SPL"','were');
   ctx_ddl.add_stopword('"CNT_SPL"','when');
   ctx_ddl.add_stopword('"CNT_SPL"','which');
   ctx_ddl.add_stopword('"CNT_SPL"','who');
   ctx_ddl.add_stopword('"CNT_SPL"','will');
   ctx_ddl.add_stopword('"CNT_SPL"','with');
   ctx_ddl.add_stopword('"CNT_SPL"','would');
   ctx_ddl.add_stopword('"CNT_SPL"','aan');
   ctx_ddl.add_stopword('"CNT_SPL"','aangaande');
   ctx_ddl.add_stopword('"CNT_SPL"','aangezien');
   ctx_ddl.add_stopword('"CNT_SPL"','achter');
   ctx_ddl.add_stopword('"CNT_SPL"','achterna');
   ctx_ddl.add_stopword('"CNT_SPL"','afgelopen');
   ctx_ddl.add_stopword('"CNT_SPL"','al');
   ctx_ddl.add_stopword('"CNT_SPL"','aldaar');
   ctx_ddl.add_stopword('"CNT_SPL"','aldus');
   ctx_ddl.add_stopword('"CNT_SPL"','alhoewel');
   ctx_ddl.add_stopword('"CNT_SPL"','alias');
   ctx_ddl.add_stopword('"CNT_SPL"','alle');
   ctx_ddl.add_stopword('"CNT_SPL"','allebei');
   ctx_ddl.add_stopword('"CNT_SPL"','alleen');
   ctx_ddl.add_stopword('"CNT_SPL"','alsnog');
   ctx_ddl.add_stopword('"CNT_SPL"','altijd');
   ctx_ddl.add_stopword('"CNT_SPL"','altoos');
   ctx_ddl.add_stopword('"CNT_SPL"','ander');
   ctx_ddl.add_stopword('"CNT_SPL"','andere');
   ctx_ddl.add_stopword('"CNT_SPL"','anders');
   ctx_ddl.add_stopword('"CNT_SPL"','anderszins');
   ctx_ddl.add_stopword('"CNT_SPL"','behalve');
   ctx_ddl.add_stopword('"CNT_SPL"','behoudens');
   ctx_ddl.add_stopword('"CNT_SPL"','beide');
   ctx_ddl.add_stopword('"CNT_SPL"','beiden');
   ctx_ddl.add_stopword('"CNT_SPL"','ben');
   ctx_ddl.add_stopword('"CNT_SPL"','beneden');
   ctx_ddl.add_stopword('"CNT_SPL"','bent');
   ctx_ddl.add_stopword('"CNT_SPL"','bepaald');
   ctx_ddl.add_stopword('"CNT_SPL"','betreffende');
   ctx_ddl.add_stopword('"CNT_SPL"','bij');
   ctx_ddl.add_stopword('"CNT_SPL"','binnen');
   ctx_ddl.add_stopword('"CNT_SPL"','binnenin');
   ctx_ddl.add_stopword('"CNT_SPL"','boven');
   ctx_ddl.add_stopword('"CNT_SPL"','bovenal');
   ctx_ddl.add_stopword('"CNT_SPL"','bovendien');
   ctx_ddl.add_stopword('"CNT_SPL"','bovengenoemd');
   ctx_ddl.add_stopword('"CNT_SPL"','bovenstaand');
   ctx_ddl.add_stopword('"CNT_SPL"','bovenvermeld');
   ctx_ddl.add_stopword('"CNT_SPL"','buiten');
   ctx_ddl.add_stopword('"CNT_SPL"','daar');
   ctx_ddl.add_stopword('"CNT_SPL"','daarheen');
   ctx_ddl.add_stopword('"CNT_SPL"','daarin');
   ctx_ddl.add_stopword('"CNT_SPL"','daarna');
   ctx_ddl.add_stopword('"CNT_SPL"','daarnet');
   ctx_ddl.add_stopword('"CNT_SPL"','daarom');
   ctx_ddl.add_stopword('"CNT_SPL"','daarop');
   ctx_ddl.add_stopword('"CNT_SPL"','daarvanlangs');
   ctx_ddl.add_stopword('"CNT_SPL"','dan');
   ctx_ddl.add_stopword('"CNT_SPL"','dat');
   ctx_ddl.add_stopword('"CNT_SPL"','de');
   ctx_ddl.add_stopword('"CNT_SPL"','die');
   ctx_ddl.add_stopword('"CNT_SPL"','dikwijls');
   ctx_ddl.add_stopword('"CNT_SPL"','dit');
   ctx_ddl.add_stopword('"CNT_SPL"','door');
   ctx_ddl.add_stopword('"CNT_SPL"','doorgaand');
   ctx_ddl.add_stopword('"CNT_SPL"','dus');
   ctx_ddl.add_stopword('"CNT_SPL"','echter');
   ctx_ddl.add_stopword('"CNT_SPL"','eer');
   ctx_ddl.add_stopword('"CNT_SPL"','eerdat');
   ctx_ddl.add_stopword('"CNT_SPL"','eerder');
   ctx_ddl.add_stopword('"CNT_SPL"','eerlang');
   ctx_ddl.add_stopword('"CNT_SPL"','eerst');
   ctx_ddl.add_stopword('"CNT_SPL"','elk');
   ctx_ddl.add_stopword('"CNT_SPL"','elke');
   ctx_ddl.add_stopword('"CNT_SPL"','en');
   ctx_ddl.add_stopword('"CNT_SPL"','enig');
   ctx_ddl.add_stopword('"CNT_SPL"','enigszins');
   ctx_ddl.add_stopword('"CNT_SPL"','enkel');
   ctx_ddl.add_stopword('"CNT_SPL"','er');
   ctx_ddl.add_stopword('"CNT_SPL"','erdoor');
   ctx_ddl.add_stopword('"CNT_SPL"','even');
   ctx_ddl.add_stopword('"CNT_SPL"','eveneens');
   ctx_ddl.add_stopword('"CNT_SPL"','evenwel');
   ctx_ddl.add_stopword('"CNT_SPL"','gauw');
   ctx_ddl.add_stopword('"CNT_SPL"','gedurende');
   ctx_ddl.add_stopword('"CNT_SPL"','geen');
   ctx_ddl.add_stopword('"CNT_SPL"','gehad');
   ctx_ddl.add_stopword('"CNT_SPL"','gekund');
   ctx_ddl.add_stopword('"CNT_SPL"','geleden');
   ctx_ddl.add_stopword('"CNT_SPL"','gelijk');
   ctx_ddl.add_stopword('"CNT_SPL"','gemoeten');
   ctx_ddl.add_stopword('"CNT_SPL"','gemogen');
   ctx_ddl.add_stopword('"CNT_SPL"','geweest');
   ctx_ddl.add_stopword('"CNT_SPL"','gewoon');
   ctx_ddl.add_stopword('"CNT_SPL"','gewoonweg');
   ctx_ddl.add_stopword('"CNT_SPL"','haar');
   ctx_ddl.add_stopword('"CNT_SPL"','hadden');
   ctx_ddl.add_stopword('"CNT_SPL"','hare');
   ctx_ddl.add_stopword('"CNT_SPL"','heb');
   ctx_ddl.add_stopword('"CNT_SPL"','hebben');
   ctx_ddl.add_stopword('"CNT_SPL"','hebt');
   ctx_ddl.add_stopword('"CNT_SPL"','heeft');
   ctx_ddl.add_stopword('"CNT_SPL"','hem');
   ctx_ddl.add_stopword('"CNT_SPL"','hen');
   ctx_ddl.add_stopword('"CNT_SPL"','het');
   ctx_ddl.add_stopword('"CNT_SPL"','hierbeneden');
   ctx_ddl.add_stopword('"CNT_SPL"','hierboven');
   ctx_ddl.add_stopword('"CNT_SPL"','hij');
   ctx_ddl.add_stopword('"CNT_SPL"','hoe');
   ctx_ddl.add_stopword('"CNT_SPL"','hoewel');
   ctx_ddl.add_stopword('"CNT_SPL"','hun');
   ctx_ddl.add_stopword('"CNT_SPL"','hunne');
   ctx_ddl.add_stopword('"CNT_SPL"','ik');
   ctx_ddl.add_stopword('"CNT_SPL"','ikzelf');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','in');*/
   ctx_ddl.add_stopword('"CNT_SPL"','inmiddels');
   ctx_ddl.add_stopword('"CNT_SPL"','inzake');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','is');*/
   ctx_ddl.add_stopword('"CNT_SPL"','jezelf');
   ctx_ddl.add_stopword('"CNT_SPL"','jij');
   ctx_ddl.add_stopword('"CNT_SPL"','jijzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','jou');
   ctx_ddl.add_stopword('"CNT_SPL"','jouw');
   ctx_ddl.add_stopword('"CNT_SPL"','jouwe');
   ctx_ddl.add_stopword('"CNT_SPL"','juist');
   ctx_ddl.add_stopword('"CNT_SPL"','jullie');
   ctx_ddl.add_stopword('"CNT_SPL"','kan');
   ctx_ddl.add_stopword('"CNT_SPL"','klaar');
   ctx_ddl.add_stopword('"CNT_SPL"','kon');
   ctx_ddl.add_stopword('"CNT_SPL"','konden');
   ctx_ddl.add_stopword('"CNT_SPL"','krachtens');
   ctx_ddl.add_stopword('"CNT_SPL"','kunnen');
   ctx_ddl.add_stopword('"CNT_SPL"','kunt');
   ctx_ddl.add_stopword('"CNT_SPL"','later');
   ctx_ddl.add_stopword('"CNT_SPL"','liever');
   ctx_ddl.add_stopword('"CNT_SPL"','maar');
   ctx_ddl.add_stopword('"CNT_SPL"','mag');
   ctx_ddl.add_stopword('"CNT_SPL"','meer');
   ctx_ddl.add_stopword('"CNT_SPL"','met');
   ctx_ddl.add_stopword('"CNT_SPL"','mezelf');
   ctx_ddl.add_stopword('"CNT_SPL"','mij');
   ctx_ddl.add_stopword('"CNT_SPL"','mijn');
   ctx_ddl.add_stopword('"CNT_SPL"','mijnent');
   ctx_ddl.add_stopword('"CNT_SPL"','mijner');
   ctx_ddl.add_stopword('"CNT_SPL"','mijzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','misschien');
   ctx_ddl.add_stopword('"CNT_SPL"','mocht');
   ctx_ddl.add_stopword('"CNT_SPL"','mochten');
   ctx_ddl.add_stopword('"CNT_SPL"','moest');
   ctx_ddl.add_stopword('"CNT_SPL"','moesten');
   ctx_ddl.add_stopword('"CNT_SPL"','moet');
   ctx_ddl.add_stopword('"CNT_SPL"','moeten');
   ctx_ddl.add_stopword('"CNT_SPL"','mogen');
   ctx_ddl.add_stopword('"CNT_SPL"','na');
   ctx_ddl.add_stopword('"CNT_SPL"','naar');
   ctx_ddl.add_stopword('"CNT_SPL"','nadat');
   ctx_ddl.add_stopword('"CNT_SPL"','net');
   ctx_ddl.add_stopword('"CNT_SPL"','niet');
   ctx_ddl.add_stopword('"CNT_SPL"','noch');
   ctx_ddl.add_stopword('"CNT_SPL"','nog');
   ctx_ddl.add_stopword('"CNT_SPL"','nogal');
   ctx_ddl.add_stopword('"CNT_SPL"','nu');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','of');*/
   ctx_ddl.add_stopword('"CNT_SPL"','ofschoon');
   ctx_ddl.add_stopword('"CNT_SPL"','om');
   ctx_ddl.add_stopword('"CNT_SPL"','omdat');
   ctx_ddl.add_stopword('"CNT_SPL"','omhoog');
   ctx_ddl.add_stopword('"CNT_SPL"','omlaag');
   ctx_ddl.add_stopword('"CNT_SPL"','omstreeks');
   ctx_ddl.add_stopword('"CNT_SPL"','omtrent');
   ctx_ddl.add_stopword('"CNT_SPL"','omver');
   ctx_ddl.add_stopword('"CNT_SPL"','onder');
   ctx_ddl.add_stopword('"CNT_SPL"','ondertussen');
   ctx_ddl.add_stopword('"CNT_SPL"','ongeveer');
   ctx_ddl.add_stopword('"CNT_SPL"','ons');
   ctx_ddl.add_stopword('"CNT_SPL"','onszelf');
   ctx_ddl.add_stopword('"CNT_SPL"','onze');
   ctx_ddl.add_stopword('"CNT_SPL"','ook');
   ctx_ddl.add_stopword('"CNT_SPL"','op');
   ctx_ddl.add_stopword('"CNT_SPL"','opnieuw');
   ctx_ddl.add_stopword('"CNT_SPL"','opzij');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','over');*/
   ctx_ddl.add_stopword('"CNT_SPL"','overeind');
   ctx_ddl.add_stopword('"CNT_SPL"','overigens');
   ctx_ddl.add_stopword('"CNT_SPL"','pas');
   ctx_ddl.add_stopword('"CNT_SPL"','precies');
   ctx_ddl.add_stopword('"CNT_SPL"','reeds');
   ctx_ddl.add_stopword('"CNT_SPL"','rond');
   ctx_ddl.add_stopword('"CNT_SPL"','rondom');
   ctx_ddl.add_stopword('"CNT_SPL"','sedert');
   ctx_ddl.add_stopword('"CNT_SPL"','sinds');
   ctx_ddl.add_stopword('"CNT_SPL"','sindsdien');
   ctx_ddl.add_stopword('"CNT_SPL"','slechts');
   ctx_ddl.add_stopword('"CNT_SPL"','sommige');
   ctx_ddl.add_stopword('"CNT_SPL"','spoedig');
   ctx_ddl.add_stopword('"CNT_SPL"','steeds');
   ctx_ddl.add_stopword('"CNT_SPL"','tamelijk');
   ctx_ddl.add_stopword('"CNT_SPL"','tenzij');
   ctx_ddl.add_stopword('"CNT_SPL"','terwijl');
   ctx_ddl.add_stopword('"CNT_SPL"','thans');
   ctx_ddl.add_stopword('"CNT_SPL"','tijdens');
   ctx_ddl.add_stopword('"CNT_SPL"','toch');
   ctx_ddl.add_stopword('"CNT_SPL"','toen');
   ctx_ddl.add_stopword('"CNT_SPL"','toenmaals');
   ctx_ddl.add_stopword('"CNT_SPL"','toenmalig');
   ctx_ddl.add_stopword('"CNT_SPL"','tot');
   ctx_ddl.add_stopword('"CNT_SPL"','totdat');
   ctx_ddl.add_stopword('"CNT_SPL"','tussen');
   ctx_ddl.add_stopword('"CNT_SPL"','uit');
   ctx_ddl.add_stopword('"CNT_SPL"','uitgezonderd');
   ctx_ddl.add_stopword('"CNT_SPL"','vaak');
   ctx_ddl.add_stopword('"CNT_SPL"','van');
   ctx_ddl.add_stopword('"CNT_SPL"','vandaan');
   ctx_ddl.add_stopword('"CNT_SPL"','vanuit');
   ctx_ddl.add_stopword('"CNT_SPL"','vanwege');
   ctx_ddl.add_stopword('"CNT_SPL"','veeleer');
   ctx_ddl.add_stopword('"CNT_SPL"','verder');
   ctx_ddl.add_stopword('"CNT_SPL"','vervolgens');
   ctx_ddl.add_stopword('"CNT_SPL"','vol');
   ctx_ddl.add_stopword('"CNT_SPL"','volgens');
   ctx_ddl.add_stopword('"CNT_SPL"','voor');
   ctx_ddl.add_stopword('"CNT_SPL"','vooraf');
   ctx_ddl.add_stopword('"CNT_SPL"','vooral');
   ctx_ddl.add_stopword('"CNT_SPL"','vooralsnog');
   ctx_ddl.add_stopword('"CNT_SPL"','voorbij');
   ctx_ddl.add_stopword('"CNT_SPL"','voordat');
   ctx_ddl.add_stopword('"CNT_SPL"','voordezen');
   ctx_ddl.add_stopword('"CNT_SPL"','voordien');
   ctx_ddl.add_stopword('"CNT_SPL"','voorheen');
   ctx_ddl.add_stopword('"CNT_SPL"','voorop');
   ctx_ddl.add_stopword('"CNT_SPL"','vooruit');
   ctx_ddl.add_stopword('"CNT_SPL"','vrij');
   ctx_ddl.add_stopword('"CNT_SPL"','vroeg');
   ctx_ddl.add_stopword('"CNT_SPL"','waar');
   ctx_ddl.add_stopword('"CNT_SPL"','waarom');
   ctx_ddl.add_stopword('"CNT_SPL"','wanneer');
   ctx_ddl.add_stopword('"CNT_SPL"','want');
   ctx_ddl.add_stopword('"CNT_SPL"','waren');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','was');*/
   ctx_ddl.add_stopword('"CNT_SPL"','wat');
   ctx_ddl.add_stopword('"CNT_SPL"','weer');
   ctx_ddl.add_stopword('"CNT_SPL"','weg');
   ctx_ddl.add_stopword('"CNT_SPL"','wegens');
   ctx_ddl.add_stopword('"CNT_SPL"','wel');
   ctx_ddl.add_stopword('"CNT_SPL"','weldra');
   ctx_ddl.add_stopword('"CNT_SPL"','welk');
   ctx_ddl.add_stopword('"CNT_SPL"','welke');
   ctx_ddl.add_stopword('"CNT_SPL"','wie');
   ctx_ddl.add_stopword('"CNT_SPL"','wiens');
   ctx_ddl.add_stopword('"CNT_SPL"','wier');
   ctx_ddl.add_stopword('"CNT_SPL"','wij');
   ctx_ddl.add_stopword('"CNT_SPL"','wijzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','zal');
   ctx_ddl.add_stopword('"CNT_SPL"','ze');
   ctx_ddl.add_stopword('"CNT_SPL"','zelfs');
   ctx_ddl.add_stopword('"CNT_SPL"','zichzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','zij');
   ctx_ddl.add_stopword('"CNT_SPL"','zijn');
   ctx_ddl.add_stopword('"CNT_SPL"','zijne');
   ctx_ddl.add_stopword('"CNT_SPL"','zo');
   ctx_ddl.add_stopword('"CNT_SPL"','zodra');
   ctx_ddl.add_stopword('"CNT_SPL"','zonder');
   ctx_ddl.add_stopword('"CNT_SPL"','zou');
   ctx_ddl.add_stopword('"CNT_SPL"','zouden');
   ctx_ddl.add_stopword('"CNT_SPL"','zowat');
   ctx_ddl.add_stopword('"CNT_SPL"','zulke');
   ctx_ddl.add_stopword('"CNT_SPL"','zullen');
   ctx_ddl.add_stopword('"CNT_SPL"','zult');
 end;
 /
 
 begin
   ctx_ddl.drop_preference('"CNT_STO"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_STO"','BASIC_STORAGE');
   ctx_ddl.set_attribute('"CNT_STO"','R_TABLE_CLAUSE','lob (data) store as (cache
 )');
   ctx_ddl.set_attribute('"CNT_STO"','I_INDEX_CLAUSE','compress 2');
 end;
 /
 
 /* Debug/progress check, will log in $ORACLE_HOME/ctx/log */
 begin
   ctx_output.start_log('CNT_LOG');
 end;
 /
 
 /* If this is a production database, fill in the added column */
 /* with the correct info to rebuild the index later           */
 update attachments
 set fmt='ignore'
 where
 contentencoding is NULL
 
 update attachments
 set fmt='ignore'
 where
 contentencoding = 'base64'
 
 update attachments
 set fmt='text'
 where
 contentencoding = 'none'
 
 update attachments
 set fmt='text'
 where
 contentencoding = 'quoted-printable'
 
 
 /* Extent the Attachments table with an extra column */
 /* This needs te be done only once                   */
 ALTER TABLE RT_USER.ATTACHMENTS
  ADD (context_fmt  VARCHAR2(10));
 
 drop index cnt;
 
 create index "RT_USER"."CNT"
   on "RT_USER"."ATTACHMENTS"
       ("CONTENT")
   indextype is ctxsys.context
   parameters('
     format column    context_fmt
     datastore       "CNT_DST"
     filter          "CNT_FIL"
     section group   "CNT_SGP"
     lexer           "CNT_LEX"
     wordlist        "CNT_WDL"
     stoplist        "CNT_SPL"
     storage         "CNT_STO"
   ')
 /
 
 begin
   ctx_output.end_log;
 end;
 /
 
 
 /* Add a trigger so that new rows are properly tagged */
 CREATE OR REPLACE TRIGGER RT_USER.BI_Att
 BEFORE INSERT
 ON RT_USER.ATTACHMENTS
 REFERENCING NEW AS New OLD AS Old
 FOR EACH ROW
 DECLARE
 /******************************************************************************
    NAME:
    PURPOSE:
 
    REVISIONS:
    Ver        Date        Author           Description
    ---------  ----------  ---------------  ------------------------------------
    1.0        24-5-2007   Joop v/d Wege   1. Created this trigger.
 
    NOTES:
    Trigger takes care of filling the context_fmt column with the right
    value so that only text is really indexed and not base64 rows
 
 ******************************************************************************/
 BEGIN
 
     IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
        :new.context_fmt := 'ignore';
     ELSE
         :new.context_fmt := 'text';
     END IF;
 
 END ;
 /
 
 
 /* There needs to be a job that on a regular basis refreshes the index */
 BEGIN
   SYS.DBMS_JOB.REMOVE(21);
 COMMIT;
 END;
 /
 
 DECLARE
   X NUMBER;
 BEGIN
   SYS.DBMS_JOB.SUBMIT
   ( job       => X
    ,what      => 'DECLARE
    stmt VARCHAR2(200);
 BEGIN
    stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
    EXECUTE IMMEDIATE stmt;
 END;
 '
    ,next_date => to_date('20-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
    ,interval  => 'TRUNC(SYSDATE+1)+6/24'
    ,no_parse  => FALSE
   );
   SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
 COMMIT;
 END;
 /
 
 /* test if things work                                                      */
 /* first run is slow, second should be fast <500ms)                         */
 select * from attachments where contains(content,'accounts')>0;
 
 /****************************************************************************/
 /* Repeat for ObjectCustomFieldValues                                       */
 /****************************************************************************/
 
 alter table objectcustomfieldvalues add (context_fmt varchar2(10));
 
 update objectcustomfieldvalues
 set context_fmt='ignore'
 where
 contentencoding is NULL;
 
 update objectcustomfieldvalues
 set context_fmt='ignore'
 where
 contentencoding = 'base64';
 
 update objectcustomfieldvalues
 set context_fmt='text'
 where
 contentencoding = 'none';
 
 update objectcustomfieldvalues
 set context_fmt='text'
 where
 contentencoding = 'quoted-printable';
 
 
 CREATE OR REPLACE TRIGGER RT_USER.BI_OCFV
 BEFORE INSERT
 ON RT_USER.objectcustomfieldvalues
 REFERENCING NEW AS New OLD AS Old
 FOR EACH ROW
 DECLARE
 /******************************************************************************
    NAME:
    PURPOSE:
 
    REVISIONS:
    Ver        Date        Author           Description
    ---------  ----------  ---------------  ------------------------------------
    1.0        24-5-2007   Joop v/d Wege   1. Created this trigger.
 
    NOTES:
    Trigger takes care of filling the context_fmt column with the right
    value so that only text is really indexed and not base64 rows
 
 ******************************************************************************/
 BEGIN
 
     IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
        :new.context_fmt := 'ignore';
     ELSE
         :new.context_fmt := 'text';
     END IF;
 
 END ;
 /
 
 
 drop index ocfv_cnt;
 
 create index ocfv_cnt
   on "RT_USER"."OBJECTCUSTOMFIELDVALUES"
       ("LARGECONTENT")
   indextype is ctxsys.context
   parameters('
     format column    context_fmt
     datastore       "CNT_DST"
     filter          "CNT_FIL"
     section group   "CNT_SGP"
     lexer           "CNT_LEX"
     wordlist        "CNT_WDL"
     stoplist        "CNT_SPL"
     storage         "CNT_STO"
   ')
 /
 
 
 select * from objectcustomfieldvalues where contains(largecontent,'accounts')>0
 
 /* depending on whether both RT and RTFM are used use this job instead of the */
 /* previous one                                                               */
 BEGIN
   SYS.DBMS_JOB.REMOVE(21);
 COMMIT;
 END;
 /
 
 DECLARE
   X NUMBER;
 BEGIN
   SYS.DBMS_JOB.SUBMIT
   ( job       => X
    ,what      => 'DECLARE
    stmt VARCHAR2(200);
 BEGIN
    stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
    EXECUTE IMMEDIATE stmt;
    stmt := ''ALTER INDEX ocfv_cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
    EXECUTE IMMEDIATE stmt;
 
 END;
 '
    ,next_date => to_date('25-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
    ,interval  => 'TRUNC(SYSDATE+1)+6/24'
    ,no_parse  => FALSE
   );
   SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
 COMMIT;
 END;
 /
 

End OracleTest part