Skip to content
This repository has been archived by the owner on Jan 28, 2025. It is now read-only.

Commit

Permalink
add version 0.2 (#38)
Browse files Browse the repository at this point in the history
* Initial CODE_OF_CONDUCT.md commit

* Initial LICENSE commit

* Initial SECURITY.md commit

* Initial README.md commit

* Initial SUPPORT.md commit

* init repo

* Fix comments on the sync meeting. Overwrite, arrayOperations.first

* Ignore property in unroll path config in resource config

* Add option.validate for removing properties in resource that appear in unroll configuations

* Add a configuration_viewer to show schema

* Add printProperties function for configuration_viewer

* add helpert tool command

* ConfigurationViewer: Add options.level and add tests

* refact on the generator

* Add throw execption for none-existed propertiesGroup and unidentified properties

* Update describe in tests

* add empty object check

* fix properties group generate issue

* Add base properties in showSchema

* ShowSchema: Add test data and change para level to maxDepth

* add contributing md

* add header to js files

* refact yml tool code

* add more tests

* rename file

* Set up CI with Azure Pipelines

[skip ci]

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update ci-pr.yml for Azure Pipelines

* Update ci-pr.yml

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update security_scan.yml for Azure Pipelines

* Update fhirServiceToCdm.json

* Add Headers in .csv files

* Update InitialzeHeader methods

* Remove personal-used config

* Update fhirServiceToCdm.json

Update batch node size option

* Added documentation

* Use same name for file and unroll path table

* ColumnName: location to FhirPath

* Add ParentPath column for unroll schema

* Remove useless changes

* yaml tool improvement

* Extract GetParentLocation

* Fix null in GetParentLocation

* Added sample config files, masked images

* add missing files

* add one more cases

* Removed extraneous characters; fixed typo

* add nuget config

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* Update release.yml for Azure Pipelines

* add column missing in schema viewer

* add column missing in schema viewer

* Update pipeline package link to released package

Update pipeline package link to released package

* Add pipeline from CDM to synapse db

Add pipeline from CDM to synapse db

* Add recreate table flag in the ADF pipeline

Add recreate table flag in the ADF pipeline

* Update for security check

1. Remove unused nuget feed.
2. Remove SAS token in download link
3. Add Semmle

* Update cdm-to-synapse.md

* Global Readiness Manifest file GeoPol.xml

* fix split resource files support (#29)

Co-authored-by: Microsoft Open Source <microsoftopensource@users.noreply.github.com>
Co-authored-by: skwwt <tong.wu@live.com>
Co-authored-by: QuanWanxx <68055742+QuanWanxx@users.noreply.github.com>
Co-authored-by: Ranvijay Kumar <ranku@microsoft.com>
Co-authored-by: ginalee-dotcom <68250213+ginalee-dotcom@users.noreply.github.com>
Co-authored-by: Joe Rowan <joerow@microsoft.com>
  • Loading branch information
7 people authored Aug 27, 2021
1 parent b89b08f commit 82be930
Show file tree
Hide file tree
Showing 11 changed files with 1,310 additions and 18 deletions.
24 changes: 24 additions & 0 deletions GeoPol.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!-- List of Include Files and folders and exception lists for this repo for GeoPolitical scanning. Contact Joerow for detail. -->
<!-- Consult Global Readiness Notebook @ aka.ms/NExTGeoPol for further details -->
<!-- This file is consumed by scripts in the 'health-localization' repo under the LocBuild\GeoPolitical folder(s) -->
<!DOCTYPE varsdefined [
<!ENTITY GitReposFolder "C:\GITs\Repos">
<!ENTITY GitRepoName "FHIR-Analytics-Pipelines">
]>

<GeoPol_Folders>
<!-- List of Folders to include for GeoPolitical scanning -->
<GitRepoName>&GitRepoName;</GitRepoName>
<Component Include="List here folders to Include in a GeoPol Scan">
<!-- . means the entire repo -->
<!-- Use back slash \ to indicate folder path e.g. C:\Temp\Git\ -->
<IncludeFolder>.</IncludeFolder>
</Component>
<Component Exclude="List exceptions here to not be scanned, that have been included above">
<!-- Make sure to consult http://aka.ms/NExtStart if excluding 3rd party or OSS components -->
<!-- Use back slash \ to indicate folder path e.g. src\external\ -->
<ExcludeFolder>.gitignore</ExcludeFolder>
<ExcludeFolder>GeoPol.xml</ExcludeFolder>
<!-- 3rd Party Libraries section below -->
</Component>
</GeoPol_Folders>
25 changes: 20 additions & 5 deletions Microsoft.Health.Fhir.Transformation.BatchExecutor/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
// -------------------------------------------------------------------------------------------------

using System;
using System.Collections.Generic;
using System.CommandLine;
using System.CommandLine.Binding;
using System.CommandLine.Invocation;
using System.IO;
using System.Threading.Tasks;
using Azure.Identity;
using Microsoft.CommonDataModel.ObjectModel.Cdm;
Expand All @@ -15,6 +17,7 @@
using Microsoft.Health.Fhir.Transformation.Cdm;
using Microsoft.Health.Fhir.Transformation.Cdm.BatchExecutor;
using Microsoft.Health.Fhir.Transformation.Core;
using Newtonsoft.Json;

namespace Microsoft.Health.Fhir.Transformation.BatchExecutor
{
Expand Down Expand Up @@ -51,15 +54,18 @@ static async Task Main(string[] args)
ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret);

StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
TabularMappingDefinition[] mappings = configLoader.Load();
TabularMappingDefinition[] mappings = configLoader.Load();

AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential);
await sink.InitAsync();
await sink.CreateFileSystemClientIfNotExistAsync();

CdmCorpusDefinition defination = InitAdlscdmCorpusDefinition(adlsAccount, "/" + cdmFileSystem, tenantId, clientId, clientSecret);
CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination);
await cdmSchemaGenerator.InitializeCdmFolderAsync(mappings, "adls");
List<string> entities = await cdmSchemaGenerator.InitializeCdmFolderAsync(mappings, "adls");

WriteActivityOutputs(entities);

logger.LogInformation("Generate CDM schema completed.");
});
rootCommand.AddCommand(generateSchemaCommand);
Expand All @@ -86,16 +92,18 @@ static async Task Main(string[] args)
StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
TabularMappingDefinition[] mappings = configLoader.Load();

ISource source = new StorageBlobNdjsonSource(new Uri(inputBlobUri), credential)
Uri inputUri = new Uri(inputBlobUri);
ISource source = new StorageBlobNdjsonSource(inputUri, credential)
{
ConcurrentCount = Environment.ProcessorCount * 2
};

string fileName = Path.GetFileNameWithoutExtension(inputUri.AbsolutePath);
AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential)
{
CsvFilePath = (string tableName) =>
{
return $"data/Local{tableName}/partition-data-{operationId}.csv";
return $"data/Local{tableName}/partition-data-{fileName}-{operationId}.csv";
},
ConcurrentCount = Environment.ProcessorCount * 2
};
Expand All @@ -122,6 +130,13 @@ static async Task Main(string[] args)
await rootCommand.InvokeAsync(args);
}

private static void WriteActivityOutputs(List<string> entities)
{
dynamic outputs = new System.Dynamic.ExpandoObject();
outputs.Entities = entities;
File.WriteAllText("outputs.json", JsonConvert.SerializeObject(outputs));
}

private static CdmCorpusDefinition InitAdlscdmCorpusDefinition(string account, string fileSystemRoot, string tenantId, string clientId, string secret)
{
var cdmCorpus = new CdmCorpusDefinition();
Expand Down
15 changes: 11 additions & 4 deletions Microsoft.Health.Fhir.Transformation.Cdm/CdmSchemaGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,20 @@ public CdmSchemaGenerator(CdmCorpusDefinition cdmCorpusDefinition)
_cdmCorpusDefinition = cdmCorpusDefinition;
}

public async Task InitializeCdmFolderAsync(IEnumerable<TabularMappingDefinition> tabularMappings, string rootFolder = "local")
public async Task<List<string>> InitializeCdmFolderAsync(IEnumerable<TabularMappingDefinition> tabularMappings, string rootFolder = "local")
{
_cdmCorpusDefinition.SetEventCallback(null, CdmStatusLevel.None);
CdmManifestDefinition manifestAbstract = _cdmCorpusDefinition.MakeObject<CdmManifestDefinition>(CdmObjectType.ManifestDef, "tempAbstract");

var localRoot = _cdmCorpusDefinition.Storage.FetchRootFolder(rootFolder);
localRoot.Documents.Add(manifestAbstract);

await BuildCdmEntitys(_cdmCorpusDefinition, manifestAbstract, localRoot, tabularMappings);
List<string> entities = await BuildCdmEntitys(_cdmCorpusDefinition, manifestAbstract, localRoot, tabularMappings);
CdmManifestDefinition manifestResolved = await CreateResolvedManifest(manifestAbstract);
await CreateDataPatitions(_cdmCorpusDefinition, manifestResolved);
await manifestResolved.SaveAsAsync($"{manifestResolved.ManifestName}.manifest.cdm.json", true);

return entities;
}

public static CdmCorpusDefinition InitLocalcdmCorpusDefinition(string cdmRoot)
Expand Down Expand Up @@ -69,8 +71,9 @@ private static async Task CreateDataPatitions(CdmCorpusDefinition cdmCorpus, Cdm
}
}

private static async Task BuildCdmEntitys(CdmCorpusDefinition cdmCorpus, CdmManifestDefinition manifestAbstract, CdmFolderDefinition localRoot, IEnumerable<TabularMappingDefinition> tabularMappings)
private static async Task<List<string>> BuildCdmEntitys(CdmCorpusDefinition cdmCorpus, CdmManifestDefinition manifestAbstract, CdmFolderDefinition localRoot, IEnumerable<TabularMappingDefinition> tabularMappings)
{
List<string> results = new List<string>();
foreach (TabularMappingDefinition tabularMapping in tabularMappings)
{
var entity = cdmCorpus.MakeObject<CdmEntityDefinition>(CdmObjectType.EntityDef, tabularMapping.TableName, false);
Expand All @@ -86,9 +89,13 @@ private static async Task BuildCdmEntitys(CdmCorpusDefinition cdmCorpus, CdmMani
entityDoc.Definitions.Add(entity);
localRoot.Documents.Add(entityDoc, entityDoc.Name);

var resolvedEntity = await entity.CreateResolvedEntityAsync($"Local{tabularMapping.TableName}");
string entityName = $"Local{tabularMapping.TableName}";
var resolvedEntity = await entity.CreateResolvedEntityAsync(entityName);
manifestAbstract.Entities.Add(resolvedEntity);
results.Add(entityName);
}

return results;
}

private static CdmTypeAttributeDefinition CreateEntityAttributeWithPurposeAndDataType(CdmCorpusDefinition cdmCorpus, string attributeName, string purpose, string dataType)
Expand Down
34 changes: 34 additions & 0 deletions Scripts/DeployCdmToSynapsePipeline.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
param (
[Parameter(Mandatory = $true)]
[ValidateNotNullOrEmpty()]
[string]$Config
)

$configContent = (Get-Content $Config) | ConvertFrom-Json

# Deploy CDM to synapse pipelines

Write-Host "Deploying..."
$count = 0
foreach ($entity in $configContent.TemplateParameters.Entities){
$count++
Write-Host "Deploy the $entity [$($count)/$($configContent.TemplateParameters.Entities.count)]"

$templateParameters = @{
DataFactoryName = $configContent.TemplateParameters.DataFactoryName; `
SynapseWorkspace =$configContent.TemplateParameters.SynapseWorkspace; `
DedicatedSqlPool = $configContent.TemplateParameters.DedicatedSqlPool; `
AdlsAccountForCdm = $configContent.TemplateParameters.AdlsAccountForCdm; `
CdmRootLocation = $configContent.TemplateParameters.CdmRootLocation; `
StagingContainer = $configContent.TemplateParameters.StagingContainer; `
CdmLocalEntity = $entity
}

New-AzResourceGroupDeployment `
-Name DeployLocalTemplate `
-ResourceGroupName $configContent.ResourceGroup `
-TemplateFile $configContent.TemplateFilePath `
-TemplateParameterObject $templateParameters `
-verbose
}
Write-Host "Complete!"
13 changes: 13 additions & 0 deletions Scripts/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"ResourceGroup": "",
"TemplateFilePath": "../Templates/cdmToSynapse.json",
"TemplateParameters": {
"DataFactoryName": "",
"SynapseWorkspace": "",
"DedicatedSqlPool": "",
"AdlsAccountForCdm": "",
"CdmRootLocation": "cdm",
"StagingContainer": "adfstaging",
"Entities": ["LocalPatient", "LocalPatientAddress"]
}
}
Loading

0 comments on commit 82be930

Please sign in to comment.