Skip to content

What are Data Transformers?

Data Transformers convert irregular data into a sourceable format which can be processed by the Apiro pipeline. A diagram below is provided to demonstrate how they work.

Data Transformer config

Data transformers are defined within data feeds. Below is an example of a data feed containing a PDF_TEXT_EXTRACT transformer:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
<apiroConf version="1" xmlns="http://apiro.com/apiro/v1/root">
    <loadOrder>20</loadOrder>
    <envProperties>
    </envProperties>
    <dataFeeds>
        <dataFeed definition="EXPR_JSON_FEED2" name="PDF_EXTRACT">
            <execPriority>10</execPriority>
            <enabled>true</enabled>
            <push>false</push>
            <pull>true</pull>
            <schema>PDF_EXTRACT</schema>
            <config><![CDATA[
{
  "dataSource": {
    "entity": "GIT",
    "config": {
      "password": "${SYS:TESTFEED_GIT_PASSWORD}",
      "gitURL": "https://github.com/redapiro/apiro_engine_test_feeds.git",
      "branch": "rudtest",
      "pathPrefix": "/rudtest/energybills/petros.pdf",
      "username": "apirobot",
      "transformers": [
        {
          "name": "PDF_TO_TEXT",
          "entity": "PDF_TEXT_EXTRACT",
          "config": {
            "positionalSort": false
          }
        }
      ]
    }
  },
  "explicitMappings": [
      {
      "dictionary": "full_json",
      "value": "#{PAYLOAD.resolve('$')}"
    },
    {
      "dictionary": "manual_review",
      "value": "#{PAYLOAD.resolve('$.manual_review')}"
    },
    {
      "dictionary": "firstname",
      "value": "#{PAYLOAD.resolve('$.firstname')}"
    },
    {
      "dictionary": "lastname",
      "value": "#{PAYLOAD.resolve('$.lastname')}"
    },
    {
      "dictionary": "service_address",
      "value": "#{PAYLOAD.resolve('$.service_address')}"
    }
  ]
}
]]>
            </config>
        </dataFeed>
    </dataFeeds>
</apiroConf>

This transformer will read the petros.pdf PDF document and convert it into a Json structure. This can be read and sourced from by data points in the same way as a regular JSON data feed definition.

Existing Transformers

Here are the existing Apiro transformers:

@TODO TP there are two more datatransformer defs ENCRYPT and DECRYPT which use a cryptomanager scriptable to encypt or decrypt a payload. sample config would be

"transformers": [ { "name": "ENCRYPT_PL", "entity": "ENCRYPT", "config": { "cryptoManager": { "name": "CM_1", "entity": "AWS_CRYPTMAN_1" } } } ]

fyi i have hooked up veign able to use custom secretmanagers other than SYS to encode secrets

SYS is just one option for gettign secrets

so other than ${SYS: you can use ${MYSM: and it will try to locate the global defined secret manager and use that to encode into the config