Table of Contents

Search

  1. Preface
  2. Introduction to Data Engineering Administration
  3. Authentication
  4. Running Mappings on a Cluster with Kerberos Authentication
  5. Authorization
  6. Cluster Configuration
  7. Cloud Provisioning Configuration
  8. Data Integration Service Processing
  9. Appendix A: Connections Reference
  10. Appendix B: Monitoring REST API

Data Engineering Administrator Guide

Data Engineering Administrator Guide

Sample Retrieve Mapping Execution Steps

Sample Retrieve Mapping Execution Steps

The sample use case is to use the script to retrieve the details of the mapping execution steps for the Spark environment.
You can use the REST API to retrieve information about the mapping execution steps with the following request URL for a mapping with Job ID as _TNoO9ELEeiimY76kFyfuw:
<RESTOperationsHubService_Host>:<RESTOperationsHubService_Port>/RestOperationsHub/services/v1/MappingService/MappingExecutionSteps(jobId='_TNoO9ELEeiimY76kFyfuw',fetchScripts=true)

Mapping Execution Steps Output

{ "@odata.context": "$metadata#MappingExecutionSteps/$entity", "jobId": "_TNoO9ELEeiimY76kFyfuw", "fetchScripts": true, "executionStepStats": [ { "@odata.type": "#OData.Domain.MSparkApplicationStepStat", "endTime": 1539671244359, "startTime": 1539671066452, "errorCode": "", "errorMessage": "", "name": "InfaSpark0", "state": "COMPLETED", "subExecutionStepStat": [], "jobTrackerUrl": "https://ivlhdp621.informatica.com:8090/proxy/application_1539263790092_1418", "query": "package com.informatica.exec\n\nimport com.informatica.bootstrap.functions._\nimport com.informatica.bootstrap.InfaParams._\nimport com.informatica.bootstrap.InfaStreaming.writeToKafka\nimport com.informatica.products.infatransform.spark.boot._\nimport com.informatica.bootstrap._\nimport com.informatica.hive._\nimport com.informatica.bootstrap.{JsonProtocol => JP}\nimport org.apache.spark._\nimport org.apache.spark.rdd._\nimport org.apache.spark.storage.StorageLevel._\nimport org.apache.spark.sql._\nimport org.apache.spark.sql.types._\nimport org.apache.spark.sql.functions._\nimport org.apache.spark.sql.functions.{ broadcast => infabroadcast }\nimport org.apache.spark.sql.infa.expressions._\nimport java.io._\nimport java.sql.Timestamp\nimport scala.reflect.ClassTag\nimport org.apache.spark.sql.catalyst.expressions.Caster\nimport org.apache.spark.sql.catalyst.expressions.JavaCaster\nimport com.informatica.bootstrap.JavaTx._\nimport org.apache.spark.Accumulator\nimport org.apache.spark.util.LongAccumulator\nimport org.apache.spark.scheduler.SparkListener\nimport org.apache.spark.SparkEnv\nimport org.apache.spark.sql.Row\n\nobject InfaSpark0 {\n def main(s:Array[String]) {\n val sc = SparkContextLoader.getSparkContext\n val sqlContext = SparkContextLoader.getSQLContext\n val ls = new LiveStream(sc.getConf)\n ls.relay(JP.sparkConfToJson(sc.getConf)) \n ls.relay(JP.hadoopConfToJson(sc.hadoopConfiguration)) \n val lis = new InfaListener(ls,\"TAG\")\n sc.addSparkListener(lis) \nsqlContext.sparkSession.experimental.extraPreprocessing = new InfaTaggingRules().rules\n val accs = List()\n ls.relay(JP.sparkAppDetailsToJson(sc.getConf, accs)) \n lis.accumulators = accs\n import sqlContext.implicits._\n import org.apache.spark.sql.functions.{stddev_samp, var_samp}\n val icast = caster(\"MM/DD/YYYY HH24:MI:SS\")\n val acast = adapterCaster()\n val jcast = JavaCaster()\n\n try {\n Tuple2(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"0\"))), sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"1\"))));\n Tuple2(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"2\"))), sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"3\"))));\n Tuple2(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"4\"))), sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"5\"))));\n val v0 = infabroadcast(asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"6\")))).tag(\"SRC_Read_students_5\").itoDF(\"m\")).itoDF;\n val v1 = updatePartitions(asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"7\")))).tag(\"SRC_Read_students_HDFS_src\").itoDF(\"d\"), v0);\n val v2 = v1.join(v0, v0(0).===(v1(0)), \"inner\").itoDF(\"m\");\n val v3 = updatePartitions(asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"8\")))).tag(\"SRC_Read_student\").itoDF(\"d\"), v2);\n val v4 = v3.join(v2, v2(1).===(v3(0)), \"inner\").itoDF;\n val v5 = DataTypes.createDecimalType(28, 0);\n val v6 = DataTypes.createDecimalType(18, 0);\n asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"9\"))), v4.iselect(icast(icast(v4(2), v5), v6), v4(3), v4(4), icast(icast(v4(5), v5), v6)).itoDF(\"TGT_\").tag(\"TGT_Write_HDFSAppendTarget\").itoDF(\"c\").createOrReplaceTempView(\"tbl0\"));\n } finally {\n sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"10\")));\n sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"11\")));\n sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"12\")));\n }\n sc.stop\n}\n}\n[0] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`]\n[1] -> [CREATE TABLE `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode` (`col0` INT, `col1` STRING, `col2` STRING, `col3` INT, `col4` STRING) ROW FORMAT SERDE 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFASerDe' STORED AS INPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAInputFormat' OUTPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAOutputFormat' LOCATION 'hdfs://nameservice1//tmp/SPARK_impUser1/sess7939778750618549156//W7939778750618549156_infa_Read_students_HDFS_src_HDFSTgtAppend_MultiPartition_SparkMode' TBLPROPERTIES ('infa.columns.types'='int,string,string,int,string', 'pwx.mapping.file.path'='./Read_students_HDFS_src_MAPPING_37960411407997671_37960411786739094.bin', 'auto.purge'='true', 'infa.columns'='col0,col1,col2,col3,col4')]\n[2] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`]\n[3] -> [CREATE TABLE `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode` (`col0` INT, `col1` STRING, `col2` STRING, `col3` INT) ROW FORMAT SERDE 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFASerDe' STORED AS INPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAInputFormat' OUTPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAOutputFormat' LOCATION 'hdfs://nameservice1//tmp/SPARK_impUser1/sess7939778750618549156//W7939778750618549156_INFACOPY_Read_students_5_HDFSTgtAppend_MultiPartition_SparkMode' TBLPROPERTIES ('infa.columns.types'='int,string,string,int', 'pwx.mapping.file.path'='./Read_students_5_MAPPING_37960411392603831_37960411887963169.bin', 'auto.purge'='true', 'infa.columns'='col0,col1,col2,col3')]\n[4] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode`]\n[5] -> [CREATE TABLE `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode` (`col0` DECIMAL(18, 0), `col1` STRING, `col2` STRING, `col3` DECIMAL(18, 0)) ROW FORMAT SERDE 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFASerDe' STORED AS INPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAInputFormat' OUTPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAOutputFormat' LOCATION 'hdfs://nameservice1//tmp/SPARK_impUser1/sess7939778750618549156//W7939778750618549156_infa_Write_HDFSAppendTarget_HDFSTgtAppend_MultiPartition_SparkMode' TBLPROPERTIES ('infa.columns.types'='decimal(18,0),string,string,decimal(18,0)', 'pwx.mapping.file.path'='./Write_HDFSAppendTarget_MAPPING_37960411526174778_37960411903682194.bin', 'pwx.skip.serialization'='true', 'auto.purge'='true', 'infa.columns'='col0,col1,col2,col3')]\n[6] -> [SELECT `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col0` as a0, `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col1` as a1, `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col2` as a2, `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col3` as a3 FROM `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`]\n[7] -> [SELECT `w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`.`col0` as a0 FROM `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`]\n[8] -> [SELECT alias.id as a0 FROM DEFAULT.student alias]\n[9] -> [INSERT OVERWRITE TABLE `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode` SELECT tbl0.c0 as a0, tbl0.c1 as a1, tbl0.c2 as a2, tbl0.c3 as a3 FROM tbl0]\n[10] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode`]\n[11] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`]\n[12] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`]" } ] }

0 COMMENTS

We’d like to hear from you!