amazon-archives · alampada · May 8, 2020 · May 8, 2020
diff --git a/samples/DynamoDBImportCSV/CSVtoDynamoDB.json b/samples/DynamoDBImportCSV/CSVtoDynamoDB.json
@@ -2,29 +2,13 @@
   "objects": [
     {
       "myComment" : "Activity used to run the hive script to import CSV data",
-      "output": {
-        "ref": "DataNodeId_cnlSW"
-      },
-      "input": {
-        "ref": "DataNodeId_1ERqq"
-      },
-      "name": "TableRestoreActivity",
+      "name": "TableImportActivity",
       "hiveScript": "DROP TABLE IF EXISTS tempHiveTable;\n\nDROP TABLE IF EXISTS s3TempTable;\n\nCREATE EXTERNAL TABLE tempHiveTable (#{myDDBColDefn})\nSTORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' \nTBLPROPERTIES (\"dynamodb.table.name\" = \"#{myDDBTableName}\", \"dynamodb.column.mapping\" = \"#{myDDBTableColMapping}\");\n                    \nCREATE EXTERNAL TABLE s3TempTable (#{myS3ColMapping})\nROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\\n' LOCATION '#{myInputS3Loc}';\n                    \nINSERT OVERWRITE TABLE tempHiveTable SELECT * FROM s3TempTable;",
-      "id": "TableRestoreActivity",
-      "runsOn": { "ref" : "EmrClusterForRestore" },
+      "id": "TableImportActivity",
+      "runsOn": { "ref" : "EmrClusterForImport" },
       "stage": "false",
       "type": "HiveActivity"
     },
-    {
-      "myComment" : "The DynamoDB table from which we need to import data from",
-      "dataFormat": {
-        "ref": "DDBExportFormat"
-      },
-      "name": "DynamoDB",
-      "id": "DataNodeId_1ERqq",
-      "type": "DynamoDBDataNode",
-      "tableName": "#{myDDBTableName}"
-    },
     {
       "failureAndRerunMode": "CASCADE",
       "resourceRole": "DataPipelineDefaultResourceRole",
@@ -35,38 +19,14 @@
       "id": "Default"
     },
     {
-      "name": "EmrClusterForRestore",
+      "name": "EmrClusterForImport",
       "coreInstanceType": "m1.medium",
       "coreInstanceCount": "1",
       "masterInstanceType": "m1.medium",
       "releaseLabel": "emr-4.4.0",
-      "id": "EmrClusterForRestore",
+      "id": "EmrClusterForImport",
       "type": "EmrCluster",
       "terminateAfter": "2 Hours"
-    },
-    {
-      "myComment" : "The S3 path from which we import data from",
-      "directoryPath": "#{myInputS3Loc}",
-      "dataFormat": {
-        "ref": "DataFormatId_xqWRk"
-      },
-      "name": "S3DataNode",
-      "id": "DataNodeId_cnlSW",
-      "type": "S3DataNode"
-    },
-    {
-      "myComment" : "Format for the S3 Path",
-      "name": "DefaultDataFormat1",
-      "column": "not_used STRING",
-      "id": "DataFormatId_xqWRk",
-      "type": "CSV"
-    },
-    {
-      "myComment" : "Format for the DynamoDB table",
-      "name": "DDBExportFormat",
-      "id": "DDBExportFormat",
-      "column": "not_used STRING",
-      "type": "DynamoDBExportDataFormat"
     }
   ],
   "parameters": [

diff --git a/samples/DynamoDBImportCSV/README.md b/samples/DynamoDBImportCSV/README.md
@@ -1,18 +1,18 @@
-#DynamoDB to CSV import
+# DynamoDB from CSV import
 
-##About the sample
-The pipeline definition is used to import DynamoDB data to a CSV format.
+## About the sample
 
-##Running the pipeline 
+The pipeline definition is used to import DynamoDB data from a CSV format.
+
+## Running the pipeline
 
 Example DynamoDB table with keys: id
 
 User needs to provide:
 
-1. Input S3 folder: The s3 folder prefix from which the CSV data is to be imported.
-2. DynamoDB read throughput ratio: The throughput to be used for the import operation.
-3. DynamoDB table name: The table name from which we need to import the data.
-4. S3 Column Mappings: A comma seperated column definitions. For example, customer_id string, income string, demographics string, financial string
-4. Dynamodb Column Mappings: A comma seperated column definitions. For example, customer_id string, income string, demographics string, financial string
-5. S3 to DynamoDB Column Mapping: A comma separated mapping of S3 to DynamoDB for e.g. customer_id:customer_id,income:income,demographics:demographics,financial:financial. Please take care of not using spaces in between the commas.
+1. Input S3 folder: The source S3 folder prefix from which the CSV data is to be imported.
+2. DynamoDB table name: The name of the target dynamo table.
+3. S3 Column Mappings: Comma seperated column definitions. For example, customer_id string, income string, demographics string, financial string.
+4. Dynamodb Column Mappings: Comma seperated column definitions. For example, customer_id string, income string, demographics string, financial string
+5. S3 to DynamoDB Column Mapping: Comma separated mapping from S3 to DynamoDB for e.g. customer_id:customer_id,income:income,demographics:demographics,financial:financial. Please take care of not using spaces in between the commas.
 6. Log Uri: S3 log path to capture the pipeline logs.