Skip to content

Section 9 - Create Consolidated validator using IntelliJ

Go back to Getting started guide

In this section we will:

Processing Pipeline

Validated historical data movements using HISTORICAL_SHIFT Consolidated Data Validator

PORTF_VALUE_HISTORICAL_VALIDATOR

  1. If you completed all previous sections, the current state of the data is illustrated by the table below:

    BAC FIRST_NAME LAST_NAME AGE TFN YEARLY_INCOME PORTFOLIO_VALUE
    BAC111111 Tom JONES 22 111 111 111 89000 97 800
    BAC222222 Bob SMITH 35 222 222 222 99000 82 000
    BAC333333 ROGERS 54 333 333 333 125000 1 000 000
  2. If you were not able to complete the previous section you could copy the configuration below and paste it into SCHEMA_CUSTOMER.xml to continue with this section.

      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
        <?xml version="1.0" encoding="UTF-8"?>
    
        <apiroConf version="1" xmlns="http://apiro.com/apiro/v1/root">
            <groups/>
            <loadOrder>15</loadOrder>
            <schemas>
                <schema defBacked="false" historical="false" name="CUSTOMER">
                    <groupTags>
                        <groupTag>EXAMPLES</groupTag>
                    </groupTags>
                    <metaData/>
                    <identityKeys>
                        <identityKey>BAC</identityKey>
                    </identityKeys>
    
                    <!-- Data Point descriptions -->
                    <dataPoints>
                        <dataPoint name="BAC"
                                   dataType="STRING"
                                   canEditValid="true"
                                   canEditViolated="true"
                                   displayName="BAC">
                            <nullable>false</nullable>
    
                            <metaData>
                                <item name="piiClassification">
                                    <simpleValues>
                                        <simpleValue>High Risk</simpleValue>
                                    </simpleValues>
                                </item>
                            </metaData>
    
                            <!-- BAC data point processors -->
                            <rawDPValidators/>
                            <rawDPProcessors/>
                            <!--consolidationAlgorithm></consolidationAlgorithm -->
                            <consDPValidators/>
                            <consDPProcessors/>
                        </dataPoint>
    
                        <dataPoint name="FIRST_NAME"
                                   dataType="STRING"
                                   displayName="First Name"
                                   canEditValid="true"
                                   canEditViolated="true">
                            <rawDPValidators>
                                <rawDPValidator name="IN_BAC_SET_CHECK " entity="IN_SET">
                                    <config>
                                        <![CDATA[
                                        {
                                            ignoreCase : true,
                                            options : [ "Tom", "Bob"]
                                        }
                                    ]]>
                                    </config>
                                </rawDPValidator>
                            </rawDPValidators>
    
                            <consDPValidators>
                                <consDPValidator name="INVALID_IF_CONSOLIDATED_NULL" entity="NOT_NULL"/>
                            </consDPValidators>
                        </dataPoint>
    
                        <dataPoint name="LAST_NAME" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="LAST NAME">
                            <rawDPProcessors>
                                <rawDPProcessor name="CAPITALISE_LAST_NAME_RAW_PROC" entity="GEN_EXPRESS">
                                    <config>
                                        <![CDATA[
                                                #GRV{
                                                    CTX['.'] = CTX['.'].toUpperCase()
                                                }
                                            ]]>
                                    </config>
    
                                </rawDPProcessor>
                            </rawDPProcessors>
                        </dataPoint>
    
                        <dataPoint name="ADDRESS" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="ADDRESS"/>
                        <dataPoint name="PHONE_NUMBER" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="PHONE NUMBER"/>
                        <dataPoint  name="AGE" dataType="INTEGER" canEditValid="true" canEditViolated="true" displayName="Age">
                            <rawDPValidators>
                                <rawDPValidator name="INVALID_IF_NULL" entity="NOT_NULL"/> // The name can be anything and it will appear in data audit/lineage
                                <rawDPValidator name="INVALID_IF_NEGATIVE" entity="POSITIVE">
                                    <lateBound>false</lateBound> // This is the default value if one is not specified
                                </rawDPValidator>
                            </rawDPValidators>
                        </dataPoint>
                        <dataPoint name="YEARLY_INCOME" canEditValid="false" canEditViolated="true" dataType="DECIMAL" displayName="YEARLY INCOME"/>
                        <dataPoint name="TFN" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="TFN"/>
    
                        <dataPoint name="TFN_MASKED" displayName="Tax File Number Masked" dataType="STRING">
                            <consDPProcessors>
                                <consDPProcessor name="TFN_HASH_MASKING" entity="HASH_MASK">
                                    <config>
                                        <![CDATA[
                                {
                                    "inputValue":"#GRV{ CTX['TFN'] }",
                                    "maskingSalt":"aqQwSxXcfgdejhbJhdygjyfdghjHGYYIdh!66gydshasGY!"
                                }
                            ]]>
                                    </config>
                                </consDPProcessor>
                            </consDPProcessors>
                        </dataPoint>
    
                        <dataPoint name="PORTFOLIO_VALUE"
                                   displayName="Investment Portfolio Value"
                                   dataType="DECIMAL"
                                   canEditValid="false"
                                   canEditViolated="true" >
    
                            <consolidationAlgorithm name="PORTF_VALUE_WEIGHTED_MEAN_01" entity="GEN_EXPRESS">
                                <config>
                                    <![CDATA[
                                                #GRV{
                                                    def list= []
    
                                                    list.add(items.get("CUSTOMERS_A_XLSX"))
                                                    list.add(items.get("CUSTOMERS_B_XLSX"))
                                                    list.remove(null)
    
                                                    if(list.size()==0)
                                                        return 0;
                                                    else if (list.size() == 1)
                                                        return list[0]
                                                    else {
                                                        return (list[0].asDBL()*0.8 + list[1].asDBL()*0.2)
                                                    }
                                                }
                                                ]]>
                                </config>
                            </consolidationAlgorithm>
                        </dataPoint>
    
                        <dataPoint name="COMPANY_NAME" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="COMPANY NAME"/>
                        <dataPoint name="COMPANY_ADDRESS" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="COMPANY ADDRESS"/>
                        <dataPoint name="PROFILE_IMAGE" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="PROFILE_IMAGE"/>
                        <dataPoint name="COMPANY_WEBSITE" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="COMPANY WEBSITE"/>
                        <dataPoint name="XML_ROOT_DOC"  canEditValid="false" canEditViolated="true"  displayName="XML Root Doc" dataType="XML"/>
                        <dataPoint name="JSON_ROOT_DOC"  canEditValid="false" canEditViolated="true"  displayName="JSON Root Doc" dataType="JSON"/>
                    </dataPoints>
                    <schemaAppliedProcessors>
                        <groupTags>
                            <groupTag>DEFAULT</groupTag>
                        </groupTags>
                        <metaData/>
                        <rawDPValidators/>
                        <rawDPProcessors/>
                        <consDPValidators/>
                        <consDPProcessors/>
                        <dataBlockProcessors/>
                    </schemaAppliedProcessors>
                    <alerts/>
                </schema>
            </schemas>
        </apiroConf>
    

  3. In this example we will user a Historical Shift Consolidated Data Point Validator to check if the the PORTFOLIO_VALUE value shifted for more than 5.2% in the last 5 values we have sourced.

  4. Copy the configurate below and place it under <dataPoint name="PORTFOLIO_VALUE"/>.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
        <consDPValidators>
            <consDPValidator name="PORTF_VALUE_HISTORICAL_VALIDATOR" entity="HISTORICAL_SHIFT">
                <config>
                    <![CDATA[
                        {
                            "priorValues" : 5,
                            "percent" : 5.2,
                            "comparisonMore" : true
                        }
                    ]]>
                </config>
            </consDPValidator>
        </consDPValidators>

5.The complete <dataPoint name="PORTFOLIO_VALUE" will look like this

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
    <dataPoint name="PORTFOLIO_VALUE"
               displayName="Investment Portfolio Value"
               dataType="DECIMAL"
               canEditValid="false"
               canEditViolated="true" >

                <consolidationAlgorithm name="PORTF_VALUE_WEIGHTED_MEAN_01" entity="GEN_EXPRESS">
                    <config>
                        <![CDATA[
                                    #GRV{
                                        def list= []

                                        list.add(items.get("CUSTOMERS_A_XLSX"))
                                        list.add(items.get("CUSTOMERS_B_XLSX"))
                                        list.remove(null)

                                        if(list.size()==0)
                                            return 0;
                                        else if (list.size() == 1)
                                            return list[0]
                                        else {
                                            return (list[0].asDBL()*0.8 + list[1].asDBL()*0.2)
                                        }
                                    }
                                    ]]>
                    </config>
                </consolidationAlgorithm>

                <consDPValidators>
                    <consDPValidator name="PORTF_VALUE_HISTORICAL_VALIDATOR" entity="HISTORICAL_SHIFT">
                        <config>
                            <![CDATA[
                                {
                                    "priorValues" : 5,
                                    "percent" : 5.2,
                                    "comparisonMore" : true
                                }
                            ]]>
                        </config>
                    </consDPValidator>
                </consDPValidators>
    </dataPoint>
6. Note: We do not have any historical values in our current setup so no violations will be raised.

Configuration files

Completed configuration files
  • This is the completed CUSTOMER schema configuration file that checks for historical value shifts of the data point PORTFOLIO_VALUE.
      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
        <?xml version="1.0" encoding="UTF-8"?>
    
        <apiroConf version="1" xmlns="http://apiro.com/apiro/v1/root">
            <groups/>
            <loadOrder>15</loadOrder>
            <schemas>
                <schema defBacked="false" historical="false" name="CUSTOMER">
                    <groupTags>
                        <groupTag>EXAMPLES</groupTag>
                    </groupTags>
                    <metaData/>
                    <identityKeys>
                        <identityKey>BAC</identityKey>
                    </identityKeys>
    
                    <!-- Data Point descriptions -->
                    <dataPoints>
                        <dataPoint name="BAC"
                                   dataType="STRING"
                                   canEditValid="true"
                                   canEditViolated="true"
                                   displayName="BAC">
                            <nullable>false</nullable>
    
                            <metaData>
                                <item name="piiClassification">
                                    <simpleValues>
                                        <simpleValue>High Risk</simpleValue>
                                    </simpleValues>
                                </item>
                            </metaData>
    
                            <!-- BAC data point processors -->
                            <rawDPValidators/>
                            <rawDPProcessors/>
                            <!--consolidationAlgorithm></consolidationAlgorithm -->
                            <consDPValidators/>
                            <consDPProcessors/>
                        </dataPoint>
    
                        <dataPoint name="FIRST_NAME"
                                   dataType="STRING"
                                   displayName="First Name"
                                   canEditValid="true"
                                   canEditViolated="true">
                            <rawDPValidators>
                                <rawDPValidator name="IN_BAC_SET_CHECK " entity="IN_SET">
                                    <config>
                                        <![CDATA[
                                        {
                                            ignoreCase : true,
                                            options : [ "Tom", "Bob"]
                                        }
                                    ]]>
                                    </config>
                                </rawDPValidator>
                            </rawDPValidators>
    
                            <consDPValidators>
                                <consDPValidator name="INVALID_IF_CONSOLIDATED_NULL" entity="NOT_NULL"/>
                            </consDPValidators>
                        </dataPoint>
    
                        <dataPoint name="LAST_NAME" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="LAST NAME">
                            <rawDPProcessors>
                                <rawDPProcessor name="CAPITALISE_LAST_NAME_RAW_PROC" entity="GEN_EXPRESS">
                                    <config>
                                        <![CDATA[
                                                #GRV{
                                                    CTX['.'] = CTX['.'].toUpperCase()
                                                }
                                            ]]>
                                    </config>
    
                                </rawDPProcessor>
                            </rawDPProcessors>
                        </dataPoint>
    
                        <dataPoint name="ADDRESS" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="ADDRESS"/>
                        <dataPoint name="PHONE_NUMBER" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="PHONE NUMBER"/>
                        <dataPoint  name="AGE" dataType="INTEGER" canEditValid="true" canEditViolated="true" displayName="Age">
                            <rawDPValidators>
                                <rawDPValidator name="INVALID_IF_NULL" entity="NOT_NULL"/> // The name can be anything and it will appear in data audit/lineage
                                <rawDPValidator name="INVALID_IF_NEGATIVE" entity="POSITIVE">
                                    <lateBound>false</lateBound> // This is the default value if one is not specified
                                </rawDPValidator>
                            </rawDPValidators>
                        </dataPoint>
                        <dataPoint name="YEARLY_INCOME" canEditValid="false" canEditViolated="true" dataType="DECIMAL" displayName="YEARLY INCOME"/>
                        <dataPoint name="TFN" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="TFN"/>
    
                        <dataPoint name="TFN_MASKED" displayName="Tax File Number Masked" dataType="STRING">
                            <consDPProcessors>
                                <consDPProcessor name="TFN_HASH_MASKING" entity="HASH_MASK">
                                    <config>
                                        <![CDATA[
                                {
                                    "inputValue":"#GRV{ CTX['TFN'] }",
                                    "maskingSalt":"aqQwSxXcfgdejhbJhdygjyfdghjHGYYIdh!66gydshasGY!"
                                }
                            ]]>
                                    </config>
                                </consDPProcessor>
                            </consDPProcessors>
                        </dataPoint>
    
                        <dataPoint name="PORTFOLIO_VALUE"
                                   displayName="Investment Portfolio Value"
                                   dataType="DECIMAL"
                                   canEditValid="false"
                                   canEditViolated="true" >
    
                            <consolidationAlgorithm name="PORTF_VALUE_WEIGHTED_MEAN_01" entity="GEN_EXPRESS">
                                <config>
                                    <![CDATA[
                                            #GRV{
                                                def list= []
    
                                                list.add(items.get("CUSTOMERS_A_XLSX"))
                                                list.add(items.get("CUSTOMERS_B_XLSX"))
                                                list.remove(null)
    
                                                if(list.size()==0)
                                                    return 0;
                                                else if (list.size() == 1)
                                                    return list[0]
                                                else {
                                                    return (list[0].asDBL()*0.8 + list[1].asDBL()*0.2)
                                                }
                                            }
                                            ]]>
                                </config>
                            </consolidationAlgorithm>
    
                            <consDPValidators>
                                <consDPValidator name="PORTF_VALUE_HISTORICAL_VALIDATOR" entity="HISTORICAL_SHIFT">
                                    <config>
                                        <![CDATA[
                                        {
                                            "priorValues" : 5,
                                            "percent" : 5.2,
                                            "comparisonMore" : true
                                        }
                                    ]]>
                                    </config>
                                </consDPValidator>
                            </consDPValidators>
                        </dataPoint>
    
                        <dataPoint name="COMPANY_NAME" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="COMPANY NAME"/>
                        <dataPoint name="COMPANY_ADDRESS" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="COMPANY ADDRESS"/>
                        <dataPoint name="PROFILE_IMAGE" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="PROFILE_IMAGE"/>
                        <dataPoint name="COMPANY_WEBSITE" canEditValid="false" canEditViolated="true" dataType="STRING" displayName="COMPANY WEBSITE"/>
                        <dataPoint name="XML_ROOT_DOC"  canEditValid="false" canEditViolated="true"  displayName="XML Root Doc" dataType="XML"/>
                        <dataPoint name="JSON_ROOT_DOC"  canEditValid="false" canEditViolated="true"  displayName="JSON Root Doc" dataType="JSON"/>
                    </dataPoints>
                    <schemaAppliedProcessors>
                        <groupTags>
                            <groupTag>DEFAULT</groupTag>
                        </groupTags>
                        <metaData/>
                        <rawDPValidators/>
                        <rawDPProcessors/>
                        <consDPValidators/>
                        <consDPProcessors/>
                        <dataBlockProcessors/>
                    </schemaAppliedProcessors>
                    <alerts/>
                </schema>
            </schemas>
        </apiroConf>
    
Deploy config files

Follow these steps Config Deployment to deploy and start using your configuration files.