Difference between revisions of "Creating Indices at the VO Level"

From Gcube Wiki
Jump to: navigation, search
(Creating a Full Text and a Forward Index for a collection with two views)
Line 555: Line 555:
  
 
Note, in the Index Type above, that we also need to define the default "gDocCollectionID" and "gDocCollectionLang" fields, although there are not referenced in the XSLTs(the are automatically added by the [[ Data_Transformation | DTS ]] ).
 
Note, in the Index Type above, that we also need to define the default "gDocCollectionID" and "gDocCollectionLang" fields, although there are not referenced in the XSLTs(the are automatically added by the [[ Data_Transformation | DTS ]] ).
Then we will create two generic resources for the Forward Rowset XSLTs for ES and DC schemas:
+
Then we will create the generic resources for the Forward Index. We will present a simple case here and we will use only the ES payload to create the Forward Rowsets. The following Rowset XSLT extracts the "title" and "creator" fields from the ES payload:
  
 
<source lang="xml">
 
<source lang="xml">
  
 +
<Resource version="0.4.x">
 +
   
 +
  <ID>848f00c0-c7a1-11de-bb0e-e5f08ba8a01a</ID>
 +
   
 +
  <Type>GenericResource</Type>
 +
   
 +
  <Scopes>
 +
       
 +
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 +
   
 +
  </Scopes>
 +
   
 +
  <Profile>
 +
       
 +
      <SecondaryType>MetadataBrokerXSLT</SecondaryType>
 +
       
 +
      <Name>BrokerXSLT_es_anylanguage_to_fwRowset_anylanguage_title_creator</Name>
 +
       
 +
      <Description>XSLT to transform from es schema to forward rowset schema including the 'title' and 'creator' fields (suitable for feeding the forward index).</Description>
 +
       
 +
      <Body>
 +
           
 +
        <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 +
               
 +
            <xsl:output xmlns:xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" method="xml" omit-xml-declaration="yes" />
 +
               
 +
            <xsl:variable xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="keys">
 +
                   
 +
              <key xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 +
                       
 +
                  <keyName xmlns:xsl="http://www.w3.org/1999/XSL/Transform">title</keyName>
 +
                       
 +
                  <keyXPath xmlns:xsl="http://www.w3.org/1999/XSL/Transform">//title</keyXPath>
 +
                   
 +
              </key>
 +
                   
 +
              <key xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 +
                       
 +
                  <keyName xmlns:xsl="http://www.w3.org/1999/XSL/Transform">creator</keyName>
 +
                       
 +
                  <keyXPath xmlns:xsl="http://www.w3.org/1999/XSL/Transform">//creator</keyXPath>
 +
                   
 +
              </key>
 +
               
 +
            </xsl:variable>
 +
               
 +
            <xsl:template xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="/">
 +
                   
 +
              <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="ROWSET">
 +
                       
 +
                  <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="INSERT">
 +
                           
 +
                    <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="TUPLE">
 +
                               
 +
                        <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="VALUE">
 +
                                   
 +
                          <xsl:for-each xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//title">
 +
                                       
 +
                              <xsl:if xmlns:xsl="http://www.w3.org/1999/XSL/Transform" test="normalize-space(.)">
 +
                                           
 +
                                <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="title">
 +
                                               
 +
                                    <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="normalize-space(.)" />
 +
                                           
 +
                                </FIELD>
 +
                                       
 +
                              </xsl:if>
 +
                                   
 +
                          </xsl:for-each>
 +
                                   
 +
                          <xsl:for-each xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//creator">
 +
                                       
 +
                              <xsl:if xmlns:xsl="http://www.w3.org/1999/XSL/Transform" test="normalize-space(.)">
 +
                                           
 +
                                <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="creator">
 +
                                               
 +
                                    <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="normalize-space(.)" />
 +
                                           
 +
                                </FIELD>
 +
                                       
 +
                              </xsl:if>
 +
                                   
 +
                          </xsl:for-each>
 +
                               
 +
                        </xsl:element>
 +
                           
 +
                    </xsl:element>
 +
                       
 +
                  </xsl:element>
 +
                   
 +
              </xsl:element>
 +
               
 +
            </xsl:template>
 +
           
 +
        </xsl:stylesheet>
 +
       
 +
      </Body>
 +
   
 +
  </Profile>
  
 +
</Resource>
  
 
</source>  
 
</source>  
 +
 +
In this example "title" and "creator" will have string values, so we just need to create a generic resource for string-string key-value pairs:
 +
 +
<source lang="xml">
 +
 +
<Resource version="0.4.x">
 +
 
 +
  <ID>dad02130-f371-11dd-8807-8f9f04747f1a</ID>
 +
 
 +
  <Type>GenericResource</Type>
 +
 
 +
  <Scopes>
 +
       
 +
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 +
 
 +
  </Scopes>
 +
 
 +
  <Profile>
 +
       
 +
      <SecondaryType>ForwardIndexType</SecondaryType>
 +
 
 +
      <Name>IndexType_fwd_string_string</Name>
 +
 
 +
      <Description>Definition of the index type 'string_string' for the forward index</Description>
 +
 
 +
      <Body>
 +
           
 +
        <field-list>
 +
               
 +
            <field name="key">
 +
     
 +
              <type>string</type>
 +
     
 +
              <sort>ascending</sort>
 +
   
 +
            </field>
 +
   
 +
            <field name="value">
 +
     
 +
              <type>string</type>
 +
   
 +
            </field>
 +
   
 +
        </field-list>
 +
 
 +
      </Body>
 +
 
 +
  </Profile>
 +
 +
</Resource>
 +
 +
</source>
 +
 +
In case we had a field that would have a single date value in the format 'yyyy-MM-dd' for each document, we would also have to create a generic resource for a string-date key-value pair like the following one:
 +
 +
<source lang="xml">
 +
 +
<Resource version="0.4.x">
 +
 
 +
  <ID>fda25d20-7a8c-11de-8711-ef1b13f1e127</ID>
 +
 
 +
  <Type>GenericResource</Type>
 +
 
 +
  <Scopes>
 +
       
 +
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 +
 
 +
  </Scopes>
 +
 
 +
  <Profile>
 +
       
 +
      <SecondaryType>ForwardIndexType</SecondaryType>
 +
 
 +
      <Name>IndexType_fwd_esDate_string</Name>
 +
 
 +
      <Description>Definition of the index type 'date_string' for the forward index (suitable for es schema indexing)</Description>
 +
 
 +
      <Body>
 +
           
 +
        <field-list>
 +
               
 +
            <field name="key">
 +
     
 +
              <type>date</type>
 +
     
 +
              <format>yyyy-MM-dd</format>
 +
     
 +
              <sort>ascending</sort>
 +
   
 +
            </field>
 +
   
 +
            <field name="value">
 +
     
 +
              <type>string</type>
 +
   
 +
            </field>
 +
   
 +
        </field-list>
 +
 
 +
      </Body>
 +
 
 +
  </Profile>
 +
 +
</Resource>
 +
 +
</source>
 +
 +
Finally we would have to append to the IRBootStrapper configuration generic resource the following job that would create the Full Text and Forward Index:
  
 
<source lang="xml">
 
<source lang="xml">
Line 600: Line 808:
 
</source>
 
</source>
  
For the jobtype:
+
where the jobtype is defined in the corresponding section of the IRBootstrapper configuration:
  
 
<source lang="xml">
 
<source lang="xml">

Revision as of 21:17, 30 September 2011

Indexing Procedure

The Indexing procedure refers to the creation of indices for the collections imported in a Virtual Organization. It consists of three steps:

  • Creation of the Rowset XSLT generic resources, that transform collection data into data that can be fed to an Index.
  • Creation of the Index type generic resources, that define the Index configuration.
  • Definition of an IRBootstrapper job that will perform the steps required to create the Indices.

In the first two steps we create generic resources for the Rowset XSLTs and Index Types through the Resource Management portlet . You can find detailed descriptions for the Rowset data (the output of the Rowset XSLT transformation) in the following sections:

You can find detailed descriptions for the Index Type definition here:

For the third step, a definition of an IRBootstrapper job is required. You can find the details for defining such a job in the IR Bootstrapper section. The two examples that follow will clarify the three steps.

Creating a Full Text and a Forward Index for a collection with two views

In this example we will create a Full Text and a Forward Index for a collection that has one view for the data in ES schema and one view for the data in DC schema. First of all let's create a generic resource for the Full Text Rowset XSLT for the collection data in ES schema:

<Resource version="0.4.x">
 
   <ID>5d10db30-f372-11dd-8807-8f9f04747f1a</ID>
 
   <Type>GenericResource</Type>
 
   <Scopes>
 
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 
   </Scopes>
 
   <Profile>
 
      <SecondaryType>MetadataBrokerXSLT</SecondaryType>
 
      <Name>BrokerXSLT_es_anylanguage_to_ftRowset_anylanguage</Name>
 
      <Description>XSLT to transform from es schema to full text index rowset schema</Description>
 
      <Body>
 
         <xsl:stylesheet xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 
            <xsl:output xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" method="xml" omit-xml-declaration="yes" />
 
            <xsl:template xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="/">
 
               <ROWSET xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                  <xsl:apply-templates xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//esObject" />
 
               </ROWSET>
 
            </xsl:template>
 
            <xsl:template xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="//esObject">
 
               <ROW xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="title">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="title" />
 
                  </FIELD>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="description">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="description" />
 
                  </FIELD>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="purpose">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="purpose" />
 
                  </FIELD>
 
                  <xsl:for-each xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="provenance/creator">
 
                     <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="creator">
 
                        <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="." />
 
                     </FIELD>
 
                  </xsl:for-each>
 
                  <xsl:for-each xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="descKeys/keyword">
 
                     <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="keyword">
 
                        <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="." />
 
                     </FIELD>
 
                  </xsl:for-each>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="date">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="timeFrame/end" />
 
                  </FIELD>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="suppInfo">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="suppInfo" />
 
                  </FIELD>
 
               </ROW>
 
            </xsl:template>
 
         </xsl:stylesheet>
 
      </Body>
 
   </Profile>
 
</Resource>

Note that this XSLT extracts a number of fields from the collection payload. In the same fashion we create the Full Text Rowset XSLT for the DC schema:

<Resource version="0.4.x">
 
   <ID>5b31baf0-f372-11dd-8807-8f9f04747f1a</ID>
 
   <Type>GenericResource</Type>
 
   <Scopes>
 
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 
   </Scopes>
 
   <Profile>
 
      <SecondaryType>MetadataBrokerXSLT</SecondaryType>
 
      <Name>BrokerXSLT_dc_anylanguage_to_ftRowset_anylanguage</Name>
 
      <Description>XSLT to transform from DC schema to FTS rowset schema (suitable for feeding the full text index).</Description>
 
      <Body>
 
         <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 
            <xsl:output xmlns:xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" method="xml" omit-xml-declaration="yes" />
 
            <xsl:template xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="/">
 
               <ROWSET xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                  <ROW xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                     <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="description.abstract">
 
                        <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='description.abstract']" />
 
                     </FIELD>
 
                     <xsl:for-each xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='subject']">
 
                        <xsl:if xmlns:xsl="http://www.w3.org/1999/XSL/Transform" test="normalize-space(.)">
 
                           <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="subject">
 
                              <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="normalize-space(.)" />
 
                           </FIELD>
 
                        </xsl:if>
 
                     </xsl:for-each>
 
                     <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="type">
 
                        <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='type']" />
 
                     </FIELD>
 
                     <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="identifier">
 
                        <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='identifier']" />
 
                     </FIELD>
 
                     <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="language">
 
                        <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='language']" />
 
                     </FIELD>
 
                     <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="publisher">
 
                        <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='publisher']" />
 
                     </FIELD>
 
                     <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="contributor">
 
                        <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='contributor']" />
 
                     </FIELD>
 
                     <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="source">
 
                        <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//*[local-name()='source']" />
 
                     </FIELD>
 
                  </ROW>
 
               </ROWSET>
 
            </xsl:template>
 
         </xsl:stylesheet>
 
      </Body>
 
   </Profile>
 
</Resource>

Additionally we must create the corresponding Full Text Index Type for the fields extracted from the ES and DC payload:

<Resource version="0.4.x">
 
   <ID>06752440-bdb7-11e0-8e2a-d27a0ad33f7c</ID>
 
   <Type>GenericResource</Type>
 
   <Scopes>
 
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 
   </Scopes>
 
   <Profile>
 
      <SecondaryType>FullTextIndexType</SecondaryType>
 
      <Name>IndexType_ft_es_dc_2.0</Name>
 
      <Description>Definition of the fulltext index type for the 'es' + 'dc' schema</Description>
 
      <Body>
 
         <index-type name="default">
 
            <field-list sort-xnear-stop-word-threshold="2E8">
 
               <field name="title">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="description">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="purpose">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="creator">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="keyword">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="date">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="suppInfo">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="description.abstract">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="subject">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="type">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="identifier">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="language">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="publisher">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="contributor">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="source">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="gDocCollectionID">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
               <field name="gDocCollectionLang">
 
                  <index>yes</index>
 
                  <store>yes</store>
 
                  <return>yes</return>
 
                  <tokenize>yes</tokenize>
 
                  <sort>no</sort>
 
                  <boost>1.0</boost>
 
               </field>
 
            </field-list>
 
         </index-type>
 
      </Body>
 
   </Profile>
 
</Resource>

Note, in the Index Type above, that we also need to define the default "gDocCollectionID" and "gDocCollectionLang" fields, although there are not referenced in the XSLTs(the are automatically added by the DTS ). Then we will create the generic resources for the Forward Index. We will present a simple case here and we will use only the ES payload to create the Forward Rowsets. The following Rowset XSLT extracts the "title" and "creator" fields from the ES payload:

<Resource version="0.4.x">
 
   <ID>848f00c0-c7a1-11de-bb0e-e5f08ba8a01a</ID>
 
   <Type>GenericResource</Type>
 
   <Scopes>
 
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 
   </Scopes>
 
   <Profile>
 
      <SecondaryType>MetadataBrokerXSLT</SecondaryType>
 
      <Name>BrokerXSLT_es_anylanguage_to_fwRowset_anylanguage_title_creator</Name>
 
      <Description>XSLT to transform from es schema to forward rowset schema including the 'title' and 'creator' fields (suitable for feeding the forward index).</Description>
 
      <Body>
 
         <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 
            <xsl:output xmlns:xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" method="xml" omit-xml-declaration="yes" />
 
            <xsl:variable xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="keys">
 
               <key xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                  <keyName xmlns:xsl="http://www.w3.org/1999/XSL/Transform">title</keyName>
 
                  <keyXPath xmlns:xsl="http://www.w3.org/1999/XSL/Transform">//title</keyXPath>
 
               </key>
 
               <key xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                  <keyName xmlns:xsl="http://www.w3.org/1999/XSL/Transform">creator</keyName>
 
                  <keyXPath xmlns:xsl="http://www.w3.org/1999/XSL/Transform">//creator</keyXPath>
 
               </key>
 
            </xsl:variable>
 
            <xsl:template xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="/">
 
               <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="ROWSET">
 
                  <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="INSERT">
 
                     <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="TUPLE">
 
                        <xsl:element xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="VALUE">
 
                           <xsl:for-each xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//title">
 
                              <xsl:if xmlns:xsl="http://www.w3.org/1999/XSL/Transform" test="normalize-space(.)">
 
                                 <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="title">
 
                                    <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="normalize-space(.)" />
 
                                 </FIELD>
 
                              </xsl:if>
 
                           </xsl:for-each>
 
                           <xsl:for-each xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//creator">
 
                              <xsl:if xmlns:xsl="http://www.w3.org/1999/XSL/Transform" test="normalize-space(.)">
 
                                 <FIELD xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="creator">
 
                                    <xsl:value-of xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="normalize-space(.)" />
 
                                 </FIELD>
 
                              </xsl:if>
 
                           </xsl:for-each>
 
                        </xsl:element>
 
                     </xsl:element>
 
                  </xsl:element>
 
               </xsl:element>
 
            </xsl:template>
 
         </xsl:stylesheet>
 
      </Body>
 
   </Profile>
 
</Resource>

In this example "title" and "creator" will have string values, so we just need to create a generic resource for string-string key-value pairs:

<Resource version="0.4.x">
 
   <ID>dad02130-f371-11dd-8807-8f9f04747f1a</ID>
 
   <Type>GenericResource</Type>
 
   <Scopes>
 
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 
   </Scopes>
 
   <Profile>
 
      <SecondaryType>ForwardIndexType</SecondaryType>
 
      <Name>IndexType_fwd_string_string</Name>
 
      <Description>Definition of the index type 'string_string' for the forward index</Description>
 
      <Body>
 
         <field-list>
 
            <field name="key">
 
               <type>string</type>
 
               <sort>ascending</sort>
 
            </field>
 
            <field name="value">
 
               <type>string</type>
 
            </field>
 
         </field-list>
 
      </Body>
 
   </Profile>
 
</Resource>

In case we had a field that would have a single date value in the format 'yyyy-MM-dd' for each document, we would also have to create a generic resource for a string-date key-value pair like the following one:

<Resource version="0.4.x">
 
   <ID>fda25d20-7a8c-11de-8711-ef1b13f1e127</ID>
 
   <Type>GenericResource</Type>
 
   <Scopes>
 
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 
   </Scopes>
 
   <Profile>
 
      <SecondaryType>ForwardIndexType</SecondaryType>
 
      <Name>IndexType_fwd_esDate_string</Name>
 
      <Description>Definition of the index type 'date_string' for the forward index (suitable for es schema indexing)</Description>
 
      <Body>
 
         <field-list>
 
            <field name="key">
 
               <type>date</type>
 
               <format>yyyy-MM-dd</format>
 
               <sort>ascending</sort>
 
            </field>
 
            <field name="value">
 
               <type>string</type>
 
            </field>
 
         </field-list>
 
      </Body>
 
   </Profile>
 
</Resource>

Finally we would have to append to the IRBootStrapper configuration generic resource the following job that would create the Full Text and Forward Index:

<job jobtype="IndexCollection" name="IndexEsDcCollection">
 
                  <initialization>
 
                     <assign to="%IndexCollection.input.ColName" value="European Environment Agency: Topic Reports" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.IndexTypeID" value="ft_es_dc_2.0" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.TransformationXSLTID" value="$BrokerXSLT_wrapperFT" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.Views" value="[ 5ec68209-41f1-4c1c-9359-ff055a38d981, 55404922-12ee-4808-9cc7-ac1ab38c4a2e ]" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.ViewsXSLTSIDs" value="[ $BrokerXSLT_es_anylanguage_to_ftRowset_anylanguage, $BrokerXSLT_dc_anylanguage_to_ftRowset_anylanguage ]" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.IdOfIndexManagerToAppend" userInputLabel="ID of FT index management resource to append" value="%userInput" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.TransformationXSLTID" value="$BrokerXSLT_wrapperFWD" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.IndexedKeyNames" value="[ ObjectID, gDocCollectionID, gDocCollectionLang, title, creator ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.IndexedKeyTypes" value="[ fwd_string_string, fwd_string_string, fwd_string_string, fwd_string_string, fwd_string_string ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.Views" value="[ 5ec68209-41f1-4c1c-9359-ff055a38d981 ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.ViewsXSLTSIDs" value="[ $BrokerXSLT_es_anylanguage_to_fwRowset_anylanguage_title_creator ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.IdOfIndexManagerToAppend" userInputLabel="ID of FWD index management resource to append" value="%userInput" />
 
                  </initialization>
 
</job>

where the jobtype is defined in the corresponding section of the IRBootstrapper configuration:

<jobtype description="Creates the required FT-FWD indices for a collection." name="IndexCollection">
 
                  <input type="GCUBECollection" />
 
                  <jobDefinition>
 
                     <sequential>
 
                        <sequential>
 
                           <assign to="%Create_MC_ft_index.input" value="%IndexCollection.input" />
 
                           <assign to="%Create_MC_ft_index.output.IndexedCollectionID" value="%Create_MC_ft_index.input.ColID" />
 
                           <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.NumberOfLookups" value="2" />
 
                           <task name="Create_MC_ft_index" tasktype="FullTextIndexGenerationTask" />
 
                        </sequential>
 
                        <sequential>
 
                           <assign to="%Create_MC_fwd_index.input" value="%IndexCollection.input" />
 
                           <assign to="%Create_MC_fwd_index.output.IndexedCollectionID" value="%Create_MC_fwd_index.input.ColID" />
 
                           <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.NumberOfLookups" value="2" />
 
                           <task name="Create_MC_fwd_index" tasktype="ForwardIndexGenerationTask" />
 
                        </sequential>
 
                     </sequential>
 
                  </jobDefinition>
 
</jobtype>