Table of Contents

Search

  1. Preface
  2. Introduction to Big Data Management Administration
  3. Authentication
  4. Running Mappings on a Cluster with Kerberos Authentication
  5. Authorization
  6. Cluster Configuration
  7. Cloud Provisioning Configuration
  8. Data Integration Service Processing
  9. Connections
  10. Multiple Blaze Instances on a Cluster
  11. Monitoring REST API

Big Data Management Administrator Guide

Big Data Management Administrator Guide

Sample Retrieve Mapping Execution Steps

Sample Retrieve Mapping Execution Steps

The sample use case is to use the script to retrieve the details of the mapping execution steps for the Spark environment.
You can use the REST API to retrieve information about the mapping execution steps with the following request URL for a mapping with Job ID as _TNoO9ELEeiimY76kFyfuw:
<RESTOperationsHubService_Host>:<RESTOperationsHubService_Port>/RestOperationsHub/services/v1/MappingService/MappingExecutionSteps(jobId='_TNoO9ELEeiimY76kFyfuw',fetchScripts=true)

Mapping Execution Steps Output

{ "@odata.context": "$metadata#MappingExecutionSteps/$entity", "jobId": "_TNoO9ELEeiimY76kFyfuw", "fetchScripts": true, "executionStepStats": [ { "@odata.type": "#OData.Domain.MSparkApplicationStepStat", "endTime": 1539671244359, "startTime": 1539671066452, "errorCode": "", "errorMessage": "", "name": "InfaSpark0", "state": "COMPLETED", "subExecutionStepStat": [], "jobTrackerUrl": "https://ivlhdp621.informatica.com:8090/proxy/application_1539263790092_1418", "query": "package com.informatica.exec\n\nimport com.informatica.bootstrap.functions._\nimport com.informatica.bootstrap.InfaParams._\nimport com.informatica.bootstrap.InfaStreaming.writeToKafka\nimport com.informatica.products.infatransform.spark.boot._\nimport com.informatica.bootstrap._\nimport com.informatica.hive._\nimport com.informatica.bootstrap.{JsonProtocol => JP}\nimport org.apache.spark._\nimport org.apache.spark.rdd._\nimport org.apache.spark.storage.StorageLevel._\nimport org.apache.spark.sql._\nimport org.apache.spark.sql.types._\nimport org.apache.spark.sql.functions._\nimport org.apache.spark.sql.functions.{ broadcast => infabroadcast }\nimport org.apache.spark.sql.infa.expressions._\nimport java.io._\nimport java.sql.Timestamp\nimport scala.reflect.ClassTag\nimport org.apache.spark.sql.catalyst.expressions.Caster\nimport org.apache.spark.sql.catalyst.expressions.JavaCaster\nimport com.informatica.bootstrap.JavaTx._\nimport org.apache.spark.Accumulator\nimport org.apache.spark.util.LongAccumulator\nimport org.apache.spark.scheduler.SparkListener\nimport org.apache.spark.SparkEnv\nimport org.apache.spark.sql.Row\n\nobject InfaSpark0 {\n def main(s:Array[String]) {\n val sc = SparkContextLoader.getSparkContext\n val sqlContext = SparkContextLoader.getSQLContext\n val ls = new LiveStream(sc.getConf)\n ls.relay(JP.sparkConfToJson(sc.getConf)) \n ls.relay(JP.hadoopConfToJson(sc.hadoopConfiguration)) \n val lis = new InfaListener(ls,\"TAG\")\n sc.addSparkListener(lis) \nsqlContext.sparkSession.experimental.extraPreprocessing = new InfaTaggingRules().rules\n val accs = List()\n ls.relay(JP.sparkAppDetailsToJson(sc.getConf, accs)) \n lis.accumulators = accs\n import sqlContext.implicits._\n import org.apache.spark.sql.functions.{stddev_samp, var_samp}\n val icast = caster(\"MM/DD/YYYY HH24:MI:SS\")\n val acast = adapterCaster()\n val jcast = JavaCaster()\n\n try {\n Tuple2(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"0\"))), sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"1\"))));\n Tuple2(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"2\"))), sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"3\"))));\n Tuple2(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"4\"))), sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"5\"))));\n val v0 = infabroadcast(asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"6\")))).tag(\"SRC_Read_students_5\").itoDF(\"m\")).itoDF;\n val v1 = updatePartitions(asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"7\")))).tag(\"SRC_Read_students_HDFS_src\").itoDF(\"d\"), v0);\n val v2 = v1.join(v0, v0(0).===(v1(0)), \"inner\").itoDF(\"m\");\n val v3 = updatePartitions(asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"8\")))).tag(\"SRC_Read_student\").itoDF(\"d\"), v2);\n val v4 = v3.join(v2, v2(1).===(v3(0)), \"inner\").itoDF;\n val v5 = DataTypes.createDecimalType(28, 0);\n val v6 = DataTypes.createDecimalType(18, 0);\n asBlock(sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"9\"))), v4.iselect(icast(icast(v4(2), v5), v6), v4(3), v4(4), icast(icast(v4(5), v5), v6)).itoDF(\"TGT_\").tag(\"TGT_Write_HDFSAppendTarget\").itoDF(\"c\").createOrReplaceTempView(\"tbl0\"));\n } finally {\n sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"10\")));\n sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"11\")));\n sqlContext.sql(InfaParams.resolve(InfaParams[String](\"InfaSpark0\", \"12\")));\n }\n sc.stop\n}\n}\n[0] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`]\n[1] -> [CREATE TABLE `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode` (`col0` INT, `col1` STRING, `col2` STRING, `col3` INT, `col4` STRING) ROW FORMAT SERDE 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFASerDe' STORED AS INPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAInputFormat' OUTPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAOutputFormat' LOCATION 'hdfs://nameservice1//tmp/SPARK_impUser1/sess7939778750618549156//W7939778750618549156_infa_Read_students_HDFS_src_HDFSTgtAppend_MultiPartition_SparkMode' TBLPROPERTIES ('infa.columns.types'='int,string,string,int,string', 'pwx.mapping.file.path'='./Read_students_HDFS_src_MAPPING_37960411407997671_37960411786739094.bin', 'auto.purge'='true', 'infa.columns'='col0,col1,col2,col3,col4')]\n[2] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`]\n[3] -> [CREATE TABLE `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode` (`col0` INT, `col1` STRING, `col2` STRING, `col3` INT) ROW FORMAT SERDE 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFASerDe' STORED AS INPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAInputFormat' OUTPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAOutputFormat' LOCATION 'hdfs://nameservice1//tmp/SPARK_impUser1/sess7939778750618549156//W7939778750618549156_INFACOPY_Read_students_5_HDFSTgtAppend_MultiPartition_SparkMode' TBLPROPERTIES ('infa.columns.types'='int,string,string,int', 'pwx.mapping.file.path'='./Read_students_5_MAPPING_37960411392603831_37960411887963169.bin', 'auto.purge'='true', 'infa.columns'='col0,col1,col2,col3')]\n[4] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode`]\n[5] -> [CREATE TABLE `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode` (`col0` DECIMAL(18, 0), `col1` STRING, `col2` STRING, `col3` DECIMAL(18, 0)) ROW FORMAT SERDE 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFASerDe' STORED AS INPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAInputFormat' OUTPUTFORMAT 'com.informatica.platform.dtm.executor.hive.boot.storagehandler.INFAOutputFormat' LOCATION 'hdfs://nameservice1//tmp/SPARK_impUser1/sess7939778750618549156//W7939778750618549156_infa_Write_HDFSAppendTarget_HDFSTgtAppend_MultiPartition_SparkMode' TBLPROPERTIES ('infa.columns.types'='decimal(18,0),string,string,decimal(18,0)', 'pwx.mapping.file.path'='./Write_HDFSAppendTarget_MAPPING_37960411526174778_37960411903682194.bin', 'pwx.skip.serialization'='true', 'auto.purge'='true', 'infa.columns'='col0,col1,col2,col3')]\n[6] -> [SELECT `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col0` as a0, `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col1` as a1, `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col2` as a2, `w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`.`col3` as a3 FROM `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`]\n[7] -> [SELECT `w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`.`col0` as a0 FROM `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`]\n[8] -> [SELECT alias.id as a0 FROM DEFAULT.student alias]\n[9] -> [INSERT OVERWRITE TABLE `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode` SELECT tbl0.c0 as a0, tbl0.c1 as a1, tbl0.c2 as a2, tbl0.c3 as a3 FROM tbl0]\n[10] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_write_hdfsappendtarget_hdfstgtappend_multipartition_sparkmode`]\n[11] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_5_hdfstgtappend_multipartition_sparkmode`]\n[12] -> [DROP TABLE IF EXISTS `default`.`w7939778750618549156_infa_read_students_hdfs_src_hdfstgtappend_multipartition_sparkmode`]" } ] }

0 COMMENTS

We’d like to hear from you!