Difference between revisions of "Creating Indices at the VO Level"

From Gcube Wiki
Jump to: navigation, search
(Creating a Full Text and a Forward Index for a collection with two views)
Line 23: Line 23:
 
==Creating a Full Text and a Forward Index for a collection with two views==
 
==Creating a Full Text and a Forward Index for a collection with two views==
  
In this example we will create a Full Text and a Forward Index for collection that has one view for the data in ES schema and one view for the data in DC schema. First of all let's create a generic resource for the Rowset XSLT for the collection data in ES schema
+
In this example we will create a Full Text and a Forward Index for collection that has one view for the data in ES schema and one view for the data in DC schema. First of all let's create a generic resource for the Rowset XSLT for the collection data in ES schema:
  
 
<source lang="xml">
 
<source lang="xml">
  
<jobtype description="Creates the required FT-FWD indices for a collection." name="IndexCollection">
+
<Resource version="0.4.x">
 +
   
 +
  <ID>5d10db30-f372-11dd-8807-8f9f04747f1a</ID>
 +
   
 +
  <Type>GenericResource</Type>
 +
   
 +
  <Scopes>
 +
       
 +
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 +
   
 +
  </Scopes>
 +
   
 +
  <Profile>
 +
       
 +
      <SecondaryType>MetadataBrokerXSLT</SecondaryType>
 +
       
 +
      <Name>BrokerXSLT_es_anylanguage_to_ftRowset_anylanguage</Name>
 +
       
 +
      <Description>XSLT to transform from es schema to full text index rowset schema</Description>
 +
       
 +
      <Body>
 +
           
 +
        <xsl:stylesheet xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 +
               
 +
            <xsl:output xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" method="xml" omit-xml-declaration="yes" />
 +
               
 +
            <xsl:template xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="/">
 +
                   
 +
              <ROWSET xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                          
 
                          
                   <input type="GCUBECollection" />
+
                   <xsl:apply-templates xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//esObject" />
 +
                   
 +
              </ROWSET>
 +
               
 +
            </xsl:template>
 +
               
 +
            <xsl:template xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="//esObject">
 +
                   
 +
              <ROW xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                          
 
                          
                   <jobDefinition>
+
                   <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="title">
 
                              
 
                              
                     <sequential>
+
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="title" />
 +
                       
 +
                  </FIELD>
 +
                       
 +
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="description">
 +
                           
 +
                    <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="description" />
 +
                       
 +
                  </FIELD>
 +
                       
 +
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="purpose">
 +
                           
 +
                    <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="purpose" />
 +
                       
 +
                  </FIELD>
 +
                       
 +
                  <xsl:for-each xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="provenance/creator">
 +
                           
 +
                    <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="creator">
 
                                  
 
                                  
                         <sequential>
+
                         <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="." />
                                   
+
                           
                          <assign to="%Create_MC_ft_index.input" value="%IndexCollection.input" />
+
                    </FIELD>
                                   
+
                       
                          <assign to="%Create_MC_ft_index.output.IndexedCollectionID" value="%Create_MC_ft_index.input.ColID" />
+
                  </xsl:for-each>
                                   
+
                       
                          <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.NumberOfLookups" value="2" />
+
                  <xsl:for-each xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="descKeys/keyword">
                                   
+
                           
                          <task name="Create_MC_ft_index" tasktype="FullTextIndexGenerationTask" />
+
                    <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="keyword">
 
                                  
 
                                  
                         </sequential>
+
                         <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="." />
                               
+
                        <sequential>
+
                                   
+
                          <assign to="%Create_MC_fwd_index.input" value="%IndexCollection.input" />
+
                                   
+
                          <assign to="%Create_MC_fwd_index.output.IndexedCollectionID" value="%Create_MC_fwd_index.input.ColID" />
+
                                   
+
                          <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.NumberOfLookups" value="2" />
+
                                   
+
                          <task name="Create_MC_fwd_index" tasktype="ForwardIndexGenerationTask" />
+
                               
+
                        </sequential>
+
 
                              
 
                              
                     </sequential>
+
                     </FIELD>
 
                          
 
                          
                   </jobDefinition>
+
                   </xsl:for-each>
 +
                       
 +
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="date">
 +
                           
 +
                    <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="timeFrame/end" />
 +
                       
 +
                  </FIELD>
 +
                       
 +
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="suppInfo">
 +
                           
 +
                    <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="suppInfo" />
 +
                       
 +
                  </FIELD>
 
                      
 
                      
</jobtype>
+
              </ROW>
 +
               
 +
            </xsl:template>
 +
           
 +
        </xsl:stylesheet>
 +
       
 +
      </Body>
 +
   
 +
  </Profile>
  
 +
</Resource>
  
 
</source>
 
</source>
 
  
  
Line 103: Line 165:
 
                      
 
                      
 
</job>
 
</job>
 +
 +
 +
</source>
 +
 +
For the jobtype:
 +
 +
<source lang="xml">
 +
 +
<jobtype description="Creates the required FT-FWD indices for a collection." name="IndexCollection">
 +
                       
 +
                  <input type="GCUBECollection" />
 +
                       
 +
                  <jobDefinition>
 +
                           
 +
                    <sequential>
 +
                               
 +
                        <sequential>
 +
                                   
 +
                          <assign to="%Create_MC_ft_index.input" value="%IndexCollection.input" />
 +
                                   
 +
                          <assign to="%Create_MC_ft_index.output.IndexedCollectionID" value="%Create_MC_ft_index.input.ColID" />
 +
                                   
 +
                          <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.NumberOfLookups" value="2" />
 +
                                   
 +
                          <task name="Create_MC_ft_index" tasktype="FullTextIndexGenerationTask" />
 +
                               
 +
                        </sequential>
 +
                               
 +
                        <sequential>
 +
                                   
 +
                          <assign to="%Create_MC_fwd_index.input" value="%IndexCollection.input" />
 +
                                   
 +
                          <assign to="%Create_MC_fwd_index.output.IndexedCollectionID" value="%Create_MC_fwd_index.input.ColID" />
 +
                                   
 +
                          <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.NumberOfLookups" value="2" />
 +
                                   
 +
                          <task name="Create_MC_fwd_index" tasktype="ForwardIndexGenerationTask" />
 +
                               
 +
                        </sequential>
 +
                           
 +
                    </sequential>
 +
                       
 +
                  </jobDefinition>
 +
                   
 +
</jobtype>
  
  
 
</source>
 
</source>

Revision as of 20:42, 30 September 2011

Indexing Procedure

The Indexing procedure refers to the creation of indices for the collections imported in a Virtual Organization. It consists of three steps:

  • Creation of the Rowset XSLT generic resources, that transform collection data into data that can be fed to an Index.
  • Creation of the Index type generic resources, that define the Index configuration.
  • Definition of an IRBootstrapper job that will perform the steps required to create the Indices.

In the first two steps we create generic resources for the Rowset XSLTs and Index Types through the Resource Management portlet . You can find detailed descriptions for the Rowset data (the output of the Rowset XSLT transformation) in the following sections:

You can find detailed descriptions for the Index Type definition here:

For the third step, a definition of an IRBootstrapper job is required. You can find the details for defining such a job in the IR Bootstrapper section. The two examples that follow will clarify the three steps.

Creating a Full Text and a Forward Index for a collection with two views

In this example we will create a Full Text and a Forward Index for collection that has one view for the data in ES schema and one view for the data in DC schema. First of all let's create a generic resource for the Rowset XSLT for the collection data in ES schema:

<Resource version="0.4.x">
 
   <ID>5d10db30-f372-11dd-8807-8f9f04747f1a</ID>
 
   <Type>GenericResource</Type>
 
   <Scopes>
 
      <Scope>/d4science.research-infrastructures.eu/Ecosystem</Scope>
 
   </Scopes>
 
   <Profile>
 
      <SecondaryType>MetadataBrokerXSLT</SecondaryType>
 
      <Name>BrokerXSLT_es_anylanguage_to_ftRowset_anylanguage</Name>
 
      <Description>XSLT to transform from es schema to full text index rowset schema</Description>
 
      <Body>
 
         <xsl:stylesheet xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 
            <xsl:output xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" method="xml" omit-xml-declaration="yes" />
 
            <xsl:template xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="/">
 
               <ROWSET xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                  <xsl:apply-templates xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="//esObject" />
 
               </ROWSET>
 
            </xsl:template>
 
            <xsl:template xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" match="//esObject">
 
               <ROW xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="title">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="title" />
 
                  </FIELD>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="description">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="description" />
 
                  </FIELD>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="purpose">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="purpose" />
 
                  </FIELD>
 
                  <xsl:for-each xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="provenance/creator">
 
                     <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="creator">
 
                        <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="." />
 
                     </FIELD>
 
                  </xsl:for-each>
 
                  <xsl:for-each xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="descKeys/keyword">
 
                     <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="keyword">
 
                        <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="." />
 
                     </FIELD>
 
                  </xsl:for-each>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="date">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="timeFrame/end" />
 
                  </FIELD>
 
                  <FIELD xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" name="suppInfo">
 
                     <xsl:value-of xmlns:dc="http://dublincore.org/documents/dcmi-terms/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" select="suppInfo" />
 
                  </FIELD>
 
               </ROW>
 
            </xsl:template>
 
         </xsl:stylesheet>
 
      </Body>
 
   </Profile>
 
</Resource>


<job jobtype="IndexCollection" name="IndexEsDcCollection">
 
                  <initialization>
 
                     <assign to="%IndexCollection.input.ColName" value="European Environment Agency: Topic Reports" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.IndexTypeID" value="ft_es_dc_2.0" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.TransformationXSLTID" value="$BrokerXSLT_wrapperFT" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.Views" value="[ 5ec68209-41f1-4c1c-9359-ff055a38d981, 55404922-12ee-4808-9cc7-ac1ab38c4a2e ]" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.ViewsXSLTSIDs" value="[ $BrokerXSLT_es_anylanguage_to_ftRowset_anylanguage, $BrokerXSLT_dc_anylanguage_to_ftRowset_anylanguage ]" />
 
                     <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.IdOfIndexManagerToAppend" userInputLabel="ID of FT index management resource to append" value="%userInput" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.TransformationXSLTID" value="$BrokerXSLT_wrapperFWD" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.IndexedKeyNames" value="[ ObjectID, gDocCollectionID, gDocCollectionLang, title, creator ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.IndexedKeyTypes" value="[ fwd_string_string, fwd_string_string, fwd_string_string, fwd_string_string, fwd_string_string ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.Views" value="[ 5ec68209-41f1-4c1c-9359-ff055a38d981 ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.ViewsXSLTSIDs" value="[ $BrokerXSLT_es_anylanguage_to_fwRowset_anylanguage_title_creator ]" />
 
                     <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.IdOfIndexManagerToAppend" userInputLabel="ID of FWD index management resource to append" value="%userInput" />
 
                  </initialization>
 
</job>

For the jobtype:

<jobtype description="Creates the required FT-FWD indices for a collection." name="IndexCollection">
 
                  <input type="GCUBECollection" />
 
                  <jobDefinition>
 
                     <sequential>
 
                        <sequential>
 
                           <assign to="%Create_MC_ft_index.input" value="%IndexCollection.input" />
 
                           <assign to="%Create_MC_ft_index.output.IndexedCollectionID" value="%Create_MC_ft_index.input.ColID" />
 
                           <assign to="%Create_MC_ft_index.FullTextIndexGenerationTask.NumberOfLookups" value="2" />
 
                           <task name="Create_MC_ft_index" tasktype="FullTextIndexGenerationTask" />
 
                        </sequential>
 
                        <sequential>
 
                           <assign to="%Create_MC_fwd_index.input" value="%IndexCollection.input" />
 
                           <assign to="%Create_MC_fwd_index.output.IndexedCollectionID" value="%Create_MC_fwd_index.input.ColID" />
 
                           <assign to="%Create_MC_fwd_index.ForwardIndexGenerationTask.NumberOfLookups" value="2" />
 
                           <task name="Create_MC_fwd_index" tasktype="ForwardIndexGenerationTask" />
 
                        </sequential>
 
                     </sequential>
 
                  </jobDefinition>
 
</jobtype>