Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: pravega/flink-connectors
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 7a3496e286543f8e33e2513c1b2f3f3978a71078
Choose a base ref
..
head repository: pravega/flink-connectors
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: e5b9c0db6f81b4f7b073c98dd787690db319feee
Choose a head ref
Showing with 7,339 additions and 5,268 deletions.
  1. +7 −1 .codecov.yml
  2. +34 −10 .github/workflows/{build-artifacts.yml → build.yml}
  3. +2 −0 .gitignore
  4. +12 −7 HEADER
  5. +2 −1 NOTICE
  6. +44 −19 README.md
  7. +40 −92 build.gradle
  8. +9 −1 checkstyle/checkstyle.xml
  9. +1 −1 checkstyle/import-control.xml
  10. +7 −1 checkstyle/spotbugs-exclude.xml
  11. +6 −1 checkstyle/spotbugs-include.xml
  12. +1 −1 checkstyle/suppressions.xml
  13. +31 −14 documentation/src/docs/batch.md
  14. +177 −0 documentation/src/docs/catalog.md
  15. +20 −5 documentation/src/docs/configurations.md
  16. +60 −23 documentation/src/docs/dev-guide.md
  17. +18 −19 documentation/src/docs/getting-started.md
  18. +7 −2 documentation/src/docs/metrics.md
  19. +38 −29 documentation/src/docs/overview.md
  20. +125 −0 documentation/src/docs/python.md
  21. +15 −12 documentation/src/docs/quickstart.md
  22. +19 −4 documentation/src/docs/serialization.md
  23. +43 −24 documentation/src/docs/streaming.md
  24. +88 −50 documentation/src/docs/table-api.md
  25. +11 −2 documentation/src/mkdocs.yml
  26. +15 −9 gradle.properties
  27. +0 −93 gradle/bintray.gradle
  28. +7 −2 gradle/checkstyle.gradle
  29. +0 −25 gradle/idea.gradle
  30. +6 −1 gradle/jacoco.gradle
  31. +61 −23 gradle/java.gradle
  32. +77 −91 gradle/maven.gradle
  33. +6 −1 gradle/mkdocs.gradle
  34. +6 −1 gradle/spotbugs.gradle
  35. BIN gradle/wrapper/gradle-wrapper.jar
  36. +1 −1 gradle/wrapper/gradle-wrapper.properties
  37. +8 −2 gradlew
  38. +10 −19 gradlew.bat
  39. +6 −1 settings.gradle
  40. +13 −3 src/main/java/io/pravega/connectors/flink/AbstractReaderBuilder.java
  41. +12 −7 src/main/java/io/pravega/connectors/flink/AbstractStreamingReaderBuilder.java
  42. +8 −2 src/main/java/io/pravega/connectors/flink/AbstractStreamingWriterBuilder.java
  43. +11 −3 src/main/java/io/pravega/connectors/flink/AbstractWriterBuilder.java
  44. +7 −1 src/main/java/io/pravega/connectors/flink/CheckpointSerializer.java
  45. +133 −0 src/main/java/io/pravega/connectors/flink/EventTimeOrderingFunction.java
  46. +0 −210 src/main/java/io/pravega/connectors/flink/EventTimeOrderingOperator.java
  47. +0 −63 src/main/java/io/pravega/connectors/flink/FlinkPravegaBatchTableSourceSinkFactory.java
  48. +16 −11 src/main/java/io/pravega/connectors/flink/FlinkPravegaInputFormat.java
  49. +12 −7 src/main/java/io/pravega/connectors/flink/FlinkPravegaOutputFormat.java
  50. +98 −66 src/main/java/io/pravega/connectors/flink/FlinkPravegaReader.java
  51. +0 −67 src/main/java/io/pravega/connectors/flink/FlinkPravegaStreamTableSourceSinkFactory.java
  52. +0 −283 src/main/java/io/pravega/connectors/flink/FlinkPravegaTableFactoryBase.java
  53. +0 −145 src/main/java/io/pravega/connectors/flink/FlinkPravegaTableSink.java
  54. +0 −163 src/main/java/io/pravega/connectors/flink/FlinkPravegaTableSource.java
  55. +61 −10 src/main/java/io/pravega/connectors/flink/FlinkPravegaWriter.java
  56. +64 −0 src/main/java/io/pravega/connectors/flink/PravegaCollector.java
  57. +25 −3 src/main/java/io/pravega/connectors/flink/PravegaConfig.java
  58. +7 −1 src/main/java/io/pravega/connectors/flink/PravegaEventRouter.java
  59. +8 −2 src/main/java/io/pravega/connectors/flink/PravegaInputSplit.java
  60. +7 −1 src/main/java/io/pravega/connectors/flink/PravegaWriterMode.java
  61. +22 −10 src/main/java/io/pravega/connectors/flink/ReaderCheckpointHook.java
  62. +147 −0 ...main/java/io/pravega/connectors/flink/dynamic/table/FlinkPravegaDynamicDeserializationSchema.java
  63. +26 −21 src/main/java/io/pravega/connectors/flink/dynamic/table/FlinkPravegaDynamicTableFactory.java
  64. +24 −12 src/main/java/io/pravega/connectors/flink/dynamic/table/FlinkPravegaDynamicTableSink.java
  65. +157 −19 src/main/java/io/pravega/connectors/flink/dynamic/table/FlinkPravegaDynamicTableSource.java
  66. +8 −233 src/main/java/io/pravega/connectors/flink/dynamic/table/PravegaOptions.java
  67. +275 −0 src/main/java/io/pravega/connectors/flink/dynamic/table/PravegaOptionsUtil.java
  68. +163 −0 src/main/java/io/pravega/connectors/flink/formats/registry/PravegaRegistryFormatFactory.java
  69. +66 −0 src/main/java/io/pravega/connectors/flink/formats/registry/PravegaRegistryOptions.java
  70. +47 −0 src/main/java/io/pravega/connectors/flink/formats/registry/PravegaRegistryOptionsUtil.java
  71. +262 −0 ...ava/io/pravega/connectors/flink/formats/registry/PravegaRegistryRowDataDeserializationSchema.java
  72. +245 −0 .../java/io/pravega/connectors/flink/formats/registry/PravegaRegistryRowDataSerializationSchema.java
  73. +16 −10 src/main/java/io/pravega/connectors/flink/serialization/DeserializerFromSchemaRegistry.java
  74. +8 −2 src/main/java/io/pravega/connectors/flink/serialization/JsonSerializer.java
  75. +12 −21 src/main/java/io/pravega/connectors/flink/serialization/PravegaDeserializationSchema.java
  76. +50 −0 ...main/java/io/pravega/connectors/flink/serialization/PravegaDeserializationSchemaWithMetadata.java
  77. +12 −10 src/main/java/io/pravega/connectors/flink/serialization/PravegaSerializationSchema.java
  78. +7 −1 src/main/java/io/pravega/connectors/flink/serialization/WrappingSerializer.java
  79. +489 −0 src/main/java/io/pravega/connectors/flink/table/catalog/pravega/PravegaCatalog.java
  80. +140 −0 src/main/java/io/pravega/connectors/flink/table/catalog/pravega/factories/PravegaCatalogFactory.java
  81. +59 −0 ...ava/io/pravega/connectors/flink/table/catalog/pravega/factories/PravegaCatalogFactoryOptions.java
  82. +163 −0 src/main/java/io/pravega/connectors/flink/table/catalog/pravega/util/PravegaSchemaUtils.java
  83. +0 −404 src/main/java/io/pravega/connectors/flink/table/descriptors/Pravega.java
  84. +0 −92 src/main/java/io/pravega/connectors/flink/table/descriptors/PravegaValidator.java
  85. +0 −236 src/main/java/io/pravega/connectors/flink/util/ConnectorConfigurations.java
  86. +45 −23 src/main/java/io/pravega/connectors/flink/util/FlinkPravegaUtils.java
  87. +7 −1 src/main/java/io/pravega/connectors/flink/util/PravegaEventRouterKeySelector.java
  88. +20 −3 src/main/java/io/pravega/connectors/flink/util/SchemaRegistryUtils.java
  89. +43 −3 src/main/java/io/pravega/connectors/flink/util/StreamWithBoundaries.java
  90. +7 −1 src/main/java/io/pravega/connectors/flink/watermark/AssignerWithTimeWindows.java
  91. +7 −1 src/main/java/io/pravega/connectors/flink/watermark/LowerBoundAssigner.java
  92. +107 −0 src/main/python/pravega_config.py
  93. +199 −0 src/main/python/pravega_reader.py
  94. +125 −0 src/main/python/pravega_writer.py
  95. +12 −3 src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory
  96. +0 −12 src/main/resources/META-INF/services/org.apache.flink.table.factories.TableFactory
  97. +33 −24 ...ravega/connectors/flink/{EventTimeOrderingOperatorTest.java → EventTimeOrderingFunctionTest.java}
  98. +7 −3 src/test/java/io/pravega/connectors/flink/FlinkPravegaInputFormatITCase.java
  99. +8 −2 src/test/java/io/pravega/connectors/flink/FlinkPravegaInputFormatTest.java
  100. +7 −3 src/test/java/io/pravega/connectors/flink/FlinkPravegaOutputFormatITCase.java
  101. +8 −2 src/test/java/io/pravega/connectors/flink/FlinkPravegaOutputFormatTest.java
  102. +9 −6 src/test/java/io/pravega/connectors/flink/FlinkPravegaReaderITCase.java
  103. +11 −7 src/test/java/io/pravega/connectors/flink/FlinkPravegaReaderRGStateITCase.java
  104. +10 −5 src/test/java/io/pravega/connectors/flink/FlinkPravegaReaderSavepointITCase.java
  105. +99 −97 src/test/java/io/pravega/connectors/flink/FlinkPravegaReaderTest.java
  106. +44 −14 src/test/java/io/pravega/connectors/flink/FlinkPravegaSchemaRegistryReaderTestITCase.java
  107. +0 −336 src/test/java/io/pravega/connectors/flink/FlinkPravegaTableFactoryTest.java
  108. +0 −406 src/test/java/io/pravega/connectors/flink/FlinkPravegaTableITCase.java
  109. +0 −182 src/test/java/io/pravega/connectors/flink/FlinkPravegaTableSinkTest.java
  110. +0 −202 src/test/java/io/pravega/connectors/flink/FlinkPravegaTableSourceTest.java
  111. +7 −1 src/test/java/io/pravega/connectors/flink/FlinkPravegaTransactionStateSerializerTest.java
  112. +158 −210 src/test/java/io/pravega/connectors/flink/FlinkPravegaWriterITCase.java
  113. +56 −3 src/test/java/io/pravega/connectors/flink/FlinkPravegaWriterTest.java
  114. +10 −8 src/test/java/io/pravega/connectors/flink/FlinkSerializerWrapperTest.java
  115. +0 −521 src/test/java/io/pravega/connectors/flink/FlinkTableITCase.java
  116. +337 −0 src/test/java/io/pravega/connectors/flink/PravegaCatalogITCase.java
  117. +8 −2 src/test/java/io/pravega/connectors/flink/PravegaConfigTest.java
  118. +11 −5 src/test/java/io/pravega/connectors/flink/PravegaInputSplitTest.java
  119. +17 −10 src/test/java/io/pravega/connectors/flink/ReaderCheckpointHookTest.java
  120. +183 −71 src/test/java/io/pravega/connectors/flink/dynamic/table/FlinkPravegaDynamicTableFactoryTest.java
  121. +386 −32 src/test/java/io/pravega/connectors/flink/dynamic/table/FlinkPravegaDynamicTableITCase.java
  122. +180 −0 src/test/java/io/pravega/connectors/flink/formats/registry/PravegaRegistryFormatFactoryTest.java
  123. +432 −0 src/test/java/io/pravega/connectors/flink/formats/registry/PravegaRegistrySeDeITCase.java
  124. +24 −1 src/test/java/io/pravega/connectors/flink/serialization/PravegaSerializationTest.java
  125. +8 −1 src/test/java/io/pravega/connectors/flink/util/FlinkPravegaUtilsTest.java
  126. +7 −2 src/test/java/io/pravega/connectors/flink/utils/DirectExecutorService.java
  127. +11 −4 src/test/java/io/pravega/connectors/flink/utils/FailingMapper.java
  128. +12 −7 src/test/java/io/pravega/connectors/flink/utils/IntSequenceExactlyOnceValidator.java
  129. +7 −1 src/test/java/io/pravega/connectors/flink/utils/IntegerDeserializationSchema.java
  130. +0 −83 src/test/java/io/pravega/connectors/flink/utils/IntegerGeneratingSource.java
  131. +7 −1 src/test/java/io/pravega/connectors/flink/utils/IntegerSerializationSchema.java
  132. +7 −1 src/test/java/io/pravega/connectors/flink/utils/IntegerSerializer.java
  133. +7 −1 src/test/java/io/pravega/connectors/flink/utils/IntegerWithEventPointer.java
  134. +7 −1 src/test/java/io/pravega/connectors/flink/utils/IntentionalException.java
  135. +7 −1 src/test/java/io/pravega/connectors/flink/utils/NotifyingMapper.java
  136. +7 −1 src/test/java/io/pravega/connectors/flink/utils/SchemaRegistryUtils.java
  137. +66 −62 src/test/java/io/pravega/connectors/flink/utils/SetupUtils.java
  138. +7 −1 src/test/java/io/pravega/connectors/flink/utils/StreamSinkOperatorTestHarness.java
  139. +10 −3 src/test/java/io/pravega/connectors/flink/utils/StreamSourceOperatorTestHarness.java
  140. +7 −1 src/test/java/io/pravega/connectors/flink/utils/SuccessException.java
  141. +28 −1 src/test/java/io/pravega/connectors/flink/utils/TestUtils.java
  142. +12 −5 src/test/java/io/pravega/connectors/flink/{ → utils}/ThrottledIntegerGeneratingSource.java
  143. +7 −1 src/test/java/io/pravega/connectors/flink/utils/ThrottledIntegerWriter.java
  144. +360 −0 src/test/java/io/pravega/connectors/flink/utils/User.java
  145. +24 −0 src/test/resources/avro/User.avsc
  146. +17 −17 src/test/resources/ca-cert.crt
  147. +28 −28 src/test/resources/ca-key.key
  148. BIN src/test/resources/client.truststore.jks
  149. +16 −0 src/test/resources/debezium-data-schema-exclude.txt
  150. +41 −0 src/test/resources/log4j2-test.properties
  151. +0 −34 src/test/resources/logback.xml
  152. +17 −16 src/test/resources/server-cert.crt
  153. +95 −88 src/test/resources/server-key.key
  154. BIN src/test/resources/server.keystore.jks
8 changes: 7 additions & 1 deletion .codecov.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
#
# Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
# Copyright Pravega Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
coverage:
status:
project:
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: build-artifacts
name: build

on: [push, pull_request, workflow_dispatch]
# workflow_dispatch should make manually triggered ci/cd possible
@@ -11,9 +11,13 @@ env:

jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
# pull all the commits, so the commit count of the artifact name will be correct
fetch-depth: 0

- name: Set up JDK 11
uses: actions/setup-java@v1
@@ -25,7 +29,7 @@ jobs:
- name: Cache gradle modules
uses: actions/cache@v2
with:
# gradle packages need to be cached
# gradle packages that need to be cached
path: |
.gradle
$HOME/.gradle
@@ -35,18 +39,24 @@ jobs:
# and we will rerun the download to get the newest packages
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}

# build cache is used when we need to publish the artifact
- name: Cache build outputs
uses: actions/cache@v2
with:
path: ./*
key: ${{ github.run_id }}

- name: Grant execute permission for gradlew
run: chmod +x gradlew

- name: Build via Gradle
run: ./gradlew clean build

- name: Build via Gradle with Scala 2.11
run: |
./gradlew clean build -PflinkScalaVersion=2.11
bash <(curl -s https://codecov.io/bash) -t 9c42ff48-d98f-4444-af05-cf734aa1dbd0
- name: Report to Codecov
run: bash <(curl -s https://codecov.io/bash) -t 9c42ff48-d98f-4444-af05-cf734aa1dbd0

snapshot:
name: Publish snapshot to Github Packages
needs: [build]
runs-on: ubuntu-latest
# only publish the snapshot when it is a push on the master or the release branch (starts with r0.x or r1.x)
@@ -55,8 +65,22 @@ jobs:
BINTRAY_USER: ${{ secrets.BINTRAY_USER }}
BINTRAY_KEY: ${{ secrets.BINTRAY_KEY }}
steps:
- name: Publish to repo
run: ./gradlew publishToRepo -PpublishUrl=jcenterSnapshot -PpublishUsername=$BINTRAY_USER -PpublishPassword=$BINTRAY_KEY
# gradle packages that need to be cached
- name: Cache gradle modules
uses: actions/cache@v2
with:
path: |
.gradle
$HOME/.gradle
$HOME/.m2
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}

# publish the artifact from the build cache
- name: Cache build outputs
uses: actions/cache@v2
with:
path: ./*
key: ${{ github.run_id }}

- name: Publish to repo with Scala 2.11
run: ./gradlew publishToRepo -PpublishUrl=jcenterSnapshot -PpublishUsername=$BINTRAY_USER -PpublishPassword=$BINTRAY_KEY -PflinkScalaVersion=2.11
- name: Publish to Github Packages
run: ./gradlew publish -PpublishUrl=https://maven.pkg.github.com/${{github.repository}} -PpublishUsername=${{github.actor}} -PpublishPassword=${{secrets.GITHUB_TOKEN}}
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -16,3 +16,5 @@ log/
.metadata/
.recommenders/
*.log
*.pyc
__pycache__/
19 changes: 12 additions & 7 deletions HEADER
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Copyright Pravega Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0


Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
3 changes: 2 additions & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Copyright (c) 2017-2021 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright (c) 2021 Pravega Authors.
Copyright (c) 2017-2021 Dell Inc., or its subsidiaries. All Rights Reserved.
63 changes: 44 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,46 +1,71 @@
<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Pravega Flink Connectors [![Build Status](https://travis-ci.org/pravega/flink-connectors.svg?branch=master)](https://travis-ci.org/pravega/flink-connectors)

# Pravega Flink Connectors

[![Build Status](https://img.shields.io/github/workflow/status/pravega/flink-connectors/build)](https://github.com/pravega/flink-connectors/actions/workflows/build.yml?query=branch%3Amaster) [![License](https://img.shields.io/github/license/pravega/flink-connectors)](https://github.com/pravega/flink-connectors/blob/master/LICENSE) [![Downloads](https://img.shields.io/github/downloads/pravega/flink-connectors/total)](https://github.com/pravega/flink-connectors/releases) [![Codecov](https://img.shields.io/codecov/c/github/pravega/flink-connectors)](https://app.codecov.io/gh/pravega/flink-connectors/)

This repository implements connectors to read and write [Pravega](http://pravega.io/) Streams with [Apache Flink](http://flink.apache.org/) stream processing framework.

The connectors can be used to build end-to-end stream processing pipelines (see [Samples](https://github.com/pravega/pravega-samples)) that use Pravega as the stream storage and message bus, and Apache Flink for computation over the streams.


## Features & Highlights

- **Exactly-once processing guarantees** for both Reader and Writer, supporting **end-to-end exactly-once processing pipelines**

- Seamless integration with Flink's checkpoints and savepoints.

- Parallel Readers and Writers supporting high throughput and low latency processing.

- Table API support to access Pravega Streams for both **Batch** and **Streaming** use case.
- **Exactly-once processing guarantees** for both Reader and Writer, supporting **end-to-end exactly-once processing pipelines**
- Seamless integration with Flink's checkpoints and savepoints.
- Parallel Readers and Writers supporting high throughput and low latency processing.
- Table API support to access Pravega Streams for both **Batch** and **Streaming** use case.

## Compatibility Matrix

The [master](https://github.com/pravega/flink-connectors) branch will always have the most recent
supported versions of Flink and Pravega.

| Git Branch | Pravega Version | Java Version To Build Connector | Java Version To Run Connector | Flink Version | Status | Artifact Link |
|-------------------------------------------------------------------------------------|------|---------|--------------|------|-------------------|----------------------------------------------------------------------------------------|
| [master](https://github.com/pravega/flink-connectors) | 0.10 | Java 11 | Java 8 or 11 | 1.12 | Under Development | http://oss.jfrog.org/jfrog-dependencies/io/pravega/pravega-connectors-flink-1.12_2.12/ |
| [r0.10-flink1.11](https://github.com/pravega/flink-connectors/tree/r0.10-flink1.11) | 0.10 | Java 11 | Java 8 or 11 | 1.11 | Under Development | http://oss.jfrog.org/jfrog-dependencies/io/pravega/pravega-connectors-flink-1.11_2.12/ |
| [r0.10-flink1.10](https://github.com/pravega/flink-connectors/tree/r0.10-flink1.10) | 0.10 | Java 11 | Java 8 or 11 | 1.10 | Under Development | http://oss.jfrog.org/jfrog-dependencies/io/pravega/pravega-connectors-flink-1.10_2.12/ |
| [r0.9](https://github.com/pravega/flink-connectors/tree/r0.9) | 0.9 | Java 11 | Java 8 or 11 | 1.11 | Released | https://repo1.maven.org/maven2/io/pravega/pravega-connectors-flink-1.11_2.12/0.9.0/ |
| [r0.9-flink1.10](https://github.com/pravega/flink-connectors/tree/r0.9-flink1.10) | 0.9 | Java 11 | Java 8 or 11 | 1.10 | Released | https://repo1.maven.org/maven2/io/pravega/pravega-connectors-flink-1.10_2.12/0.9.0/ |
| [r0.9-flink1.9](https://github.com/pravega/flink-connectors/tree/r0.9-flink1.9) | 0.9 | Java 11 | Java 8 or 11 | 1.9 | Released | https://repo1.maven.org/maven2/io/pravega/pravega-connectors-flink-1.9_2.12/0.9.0/ |
| [master](https://github.com/pravega/flink-connectors) | 0.10 | Java 11 | Java 8 or 11 | 1.13 | Under Development | https://github.com/pravega/flink-connectors/packages/910737 |
| [r0.10-flink1.12](https://github.com/pravega/flink-connectors/tree/r0.10-flink1.12) | 0.10 | Java 11 | Java 8 or 11 | 1.12 | Under Development | https://github.com/pravega/flink-connectors/packages/887087 |
| [r0.10-flink1.11](https://github.com/pravega/flink-connectors/tree/r0.10-flink1.11) | 0.10 | Java 11 | Java 8 or 11 | 1.11 | Under Development | https://github.com/pravega/flink-connectors/packages/904171 |
| [r0.9](https://github.com/pravega/flink-connectors/tree/r0.9) | 0.9 | Java 11 | Java 8 or 11 | 1.11 | Released | https://repo1.maven.org/maven2/io/pravega/pravega-connectors-flink-1.11_2.12/0.9.1/ |
| [r0.9-flink1.10](https://github.com/pravega/flink-connectors/tree/r0.9-flink1.10) | 0.9 | Java 11 | Java 8 or 11 | 1.10 | Released | https://repo1.maven.org/maven2/io/pravega/pravega-connectors-flink-1.10_2.12/0.9.1/ |
| [r0.9-flink1.9](https://github.com/pravega/flink-connectors/tree/r0.9-flink1.9) | 0.9 | Java 11 | Java 8 or 11 | 1.9 | Released | https://repo1.maven.org/maven2/io/pravega/pravega-connectors-flink-1.9_2.12/0.9.1/ |

## How to build

Building the connectors from the source is only necessary when we want to use or contribute to the latest (unreleased) version of the Pravega Flink connectors.

The connector project is linked to a specific version of Pravega, based on the `pravegaVersion` field in `gradle.properties`.

Checkout the source code repository by following below steps:

```git clone https://github.com/pravega/flink-connectors.git```

After cloning the repository, the project can be built (excluding tests) by running the below command in the project root directory flink-connectors.

```./gradlew clean build -x test```

## How to use

Check out documents [here](https://github.com/pravega/flink-connectors/blob/master/documentation/src/docs/dev-guide.md) to learn how to build your own applications using Flink connector for Pravega.

More examples on how to use the connectors with Flink application can be found in [Pravega Samples](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples) repository.

## Documentation
To learn more about how to build and use the Flink Connector library, follow the connector documentation [here](http://pravega.io/).
## Support

More examples on how to use the connectors with Flink application can be found in [Pravega Samples](https://github.com/pravega/pravega-samples) repository.
Don't hesitate to ask! Contact the developers and community on [Slack](https://pravega-io.slack.com/) ([signup](https://pravega-slack-invite.herokuapp.com/)) if you need any help. Open an issue if you found a bug on [Github Issues](https://github.com/pravega/flink-connectors/issues).

## About

132 changes: 40 additions & 92 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
/**
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

buildscript {
repositories {
jcenter()
mavenCentral()
maven {
url "https://plugins.gradle.org/m2/"
}
}
dependencies {
classpath group: 'com.github.jengelman.gradle.plugins', name:'shadow', version: shadowGradlePlugin
classpath "gradle.plugin.com.github.jengelman.gradle.plugins:shadow:${shadowGradlePlugin}"
classpath "com.jfrog.bintray.gradle:gradle-bintray-plugin:${bintrayPluginVersion}"
classpath group: 'org.hidetake', name: 'gradle-ssh-plugin', version: gradleSshPluginVersion
classpath "ru.vyarus:gradle-mkdocs-plugin:${gradleMkdocsPluginVersion}"
classpath "org.ajoberstar.grgit:grgit-gradle:${gradleGitPluginVersion}"
classpath "gradle.plugin.com.github.spotbugs.snom:spotbugs-gradle-plugin:${spotbugsPluginVersion}"
}
}
@@ -30,13 +35,11 @@ plugins {
}
apply plugin: 'eclipse'
apply plugin: 'org.ajoberstar.grgit'
apply from: 'gradle/idea.gradle'
apply from: 'gradle/java.gradle'
apply from: 'gradle/checkstyle.gradle'
apply from: 'gradle/spotbugs.gradle'
apply from: 'gradle/jacoco.gradle'
apply from: 'gradle/maven.gradle'
apply from: 'gradle/bintray.gradle'
apply from: 'gradle/mkdocs.gradle'

gradle.projectsEvaluated {
@@ -50,18 +53,21 @@ group = "io.pravega"
version = getProjectVersion()

repositories {
mavenLocal()
mavenCentral()
maven {
url "https://oss.jfrog.org/jfrog-dependencies"
url "https://maven.pkg.github.com/pravega/pravega"
credentials {
username = "pravega-public"
password = "\u0067\u0068\u0070\u005F\u0048\u0034\u0046\u0079\u0047\u005A\u0031\u006B\u0056\u0030\u0051\u0070\u006B\u0079\u0058\u006D\u0035\u0063\u0034\u0055\u0033\u006E\u0032\u0065\u0078\u0039\u0032\u0046\u006E\u0071\u0033\u0053\u0046\u0076\u005A\u0049"
}
}
if (findProperty("repositoryUrl")) {
maven {
url findProperty("repositoryUrl")
allowInsecureProtocol = true
}
}
else {
jcenter()
maven {
url "https://repository.apache.org/snapshots"
}
@@ -79,87 +85,56 @@ configurations.all {
}

configurations {
shadowOnly {
}
testCompile.extendsFrom(compileOnly)
testCompile.exclude group: 'org.slf4j', module: 'slf4j-log4j12'
testCompile.exclude group: 'log4j', module: 'log4j'
}

test {
testLogging {
onOutput { descriptor, event ->
if (project.hasProperty('logOutput')) {
events = ["passed", "failed", "skipped"]
showStandardStreams = true
logger.lifecycle(event.message)
}
}
}
testImplementation.extendsFrom(compileOnly)
testImplementation.exclude group: 'org.slf4j', module: 'slf4j-log4j12'
testImplementation.exclude group: 'log4j', module: 'log4j'
}

dependencies {
compileOnly group: 'com.github.spotbugs', name: 'spotbugs-annotations', version: spotbugsAnnotationsVersion

compile(group: 'io.pravega', name: 'pravega-client', version: pravegaVersion) {
exclude group: 'org.slf4j', module: 'slf4j-api'
api (group: 'io.pravega', name: 'pravega-client', version: pravegaVersion) {
exclude group: 'org.slf4j', module: 'slf4j-api'
}

// provided by application if needed
compileOnly group: 'io.pravega', name: 'schemaregistry-serializers', classifier: 'all', version: schemaRegistryVersion

compileOnly group: 'org.slf4j', name: 'slf4j-api', version: slf4jApiVersion // provided by flink-runtime
compile group: 'org.apache.commons', name: 'commons-lang3', version: apacheCommonsVersion
compileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion // not needed at runtime
annotationProcessor 'org.projectlombok:lombok:' + lombokVersion
compileOnly group: 'org.apache.flink', name: 'flink-streaming-java_'+flinkScalaVersion, version: flinkVersion // provided by application

compileOnly group: 'org.apache.flink', name: 'flink-table-planner_'+flinkScalaVersion, version: flinkVersion // provided by application
compileOnly group: 'org.apache.flink', name: 'flink-table-planner-blink_'+flinkScalaVersion, version: flinkVersion // provided by application
compileOnly group: 'org.apache.flink', name: 'flink-table-api-java-bridge_'+flinkScalaVersion, version: flinkVersion // provided by application

testAnnotationProcessor 'org.projectlombok:lombok:' + lombokVersion
testCompile (group: 'io.pravega', name: 'pravega-standalone', version: pravegaVersion) {
compileOnly group: 'org.apache.flink', name: 'flink-json', version: flinkVersion
compileOnly group: 'org.apache.flink', name: 'flink-avro', version: flinkVersion

testImplementation (group: 'io.pravega', name: 'pravega-standalone', version: pravegaVersion) {
exclude group: 'org.slf4j', module: 'slf4j-api'
exclude group: 'ch.qos.logback', module: 'logback-classic'
exclude group: 'org.apache.zookeeper', module: 'zookeeper'
}
testCompile (group: 'io.pravega', name: 'schemaregistry-server', version: schemaRegistryVersion) {
testImplementation group: 'org.apache.zookeeper', name: 'zookeeper', version: '3.5.9'
testImplementation (group: 'io.pravega', name: 'schemaregistry-server', version: schemaRegistryVersion) {
transitive = false
}
testCompile group: 'org.mockito', name: 'mockito-core', version: mockitoVersion
testCompile group: 'org.apache.flink', name: 'flink-core', classifier: 'tests', version: flinkVersion
testCompile group: 'org.apache.flink', name: 'flink-tests', version: flinkVersion
testCompile group: 'org.apache.flink', name: 'flink-test-utils_'+flinkScalaVersion, version: flinkVersion
testCompile group: 'org.apache.flink', name: 'flink-runtime_'+flinkScalaVersion, classifier: 'tests', version: flinkVersion
testCompile group: 'org.apache.flink', name: 'flink-table-common', classifier: 'tests', version: flinkVersion
testCompile group: 'org.apache.flink', name: 'flink-streaming-java_'+flinkScalaVersion, classifier: 'tests', version: flinkVersion
testCompile group: 'org.apache.flink', name: 'flink-json', version: flinkVersion
testCompile group: 'org.apache.flink', name: 'flink-avro', version: flinkVersion

// configuring the shaded pom dependencies
shadowOnly group: 'org.slf4j', name: 'slf4j-api', version: slf4jApiVersion
shadowOnly group: 'org.apache.flink', name: 'flink-streaming-java_'+flinkScalaVersion, version: flinkVersion
}

shadowJar {
// relocate pravega client's dependencies to minimize conflicts
relocate "org.apache.commons", "io.pravega.shaded.org.apache.commons"
relocate "com.google", "io.pravega.shaded.com.google"
relocate "io.grpc", "io.pravega.shaded.io.grpc"
relocate "com.squareup.okhttp", "io.pravega.shaded.com.squareup.okhttp"
relocate "okio", "io.pravega.shaded.okio"
relocate "io.opencensus", "io.pravega.shaded.io.opencensus"
relocate "io.netty", "io.pravega.shaded.io.netty"
relocate 'META-INF/native/libnetty', 'META-INF/native/libio_pravega_shaded_netty'
relocate 'META-INF/native/netty', 'META-INF/native/io_pravega_shaded_netty'

classifier = null
mergeServiceFiles()
testImplementation group: 'org.mockito', name: 'mockito-core', version: mockitoVersion
testImplementation group: 'org.apache.flink', name: 'flink-core', classifier: 'tests', version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-tests', classifier: 'tests', version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-test-utils_'+flinkScalaVersion, version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-runtime', classifier: 'tests', version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-table-common', classifier: 'tests', version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-streaming-java_'+flinkScalaVersion, classifier: 'tests', version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-table-planner_'+flinkScalaVersion, classifier: 'tests', version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-json', version: flinkVersion
testImplementation group: 'org.apache.flink', name: 'flink-avro', version: flinkVersion
testImplementation group: 'org.hamcrest', name: 'hamcrest', version: hamcrestVersion
}

javadoc {
title = "Pravega Flink Connector"
failOnError = false
exclude "**/impl/**";
exclude "**/impl/**"
}

def getProjectVersion() {
@@ -173,34 +148,7 @@ def getProjectVersion() {
return ver
}

distributions {
release {
contents {
from shadowJar
from(project.configurations.shadow)
from javadocJar
from sourceJar
}
}
workspace {
contents {
from ('.') {
exclude "build"
exclude ".gradle"
exclude ".idea"
exclude "out"
exclude "pravega/*"
}
}
}
}

task distribution(type: Copy, dependsOn: [installReleaseDist, installWorkspaceDist, assembleWorkspaceDist]) {
from ("$buildDir/install/flink-connectors-release")
into ("$buildDir/distributions")
}

//allow system properties to be passed to the test program
// allow system properties to be passed to the test program
// for e.g., to use standalone Pravega for running the system test "./gradlew clean build -Dpravega.uri=tcp://localhost:9090"
test {
systemProperties = System.properties
10 changes: 9 additions & 1 deletion checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@

<!-- header -->
<module name="RegexpHeader">
<property name="header" value="/\*\*\n *"/>
<property name="header" value="/\*\*\n * Copyright Pravega Authors\."/>
</module>
<module name="SuppressionFilter">
<property name="file" value="${config_loc}/suppressions.xml"/>
@@ -85,6 +85,14 @@
<module name="ImportControl">
<property name="file" value="${importControlFile}"/>
</module>
<module name="CustomImportOrder">
<property name="sortImportsInGroupAlphabetically" value="true"/>
<property name="customImportOrderRules"
value="THIRD_PARTY_PACKAGE###SPECIAL_IMPORTS###STANDARD_JAVA_PACKAGE###STATIC"/>
<property name="specialImportsRegExp" value="^javax\."/>
<property name="standardPackageRegExp" value="^java\."/>
<property name="separateLineBetweenGroups" value="false"/>
</module>

<!-- whitespace -->
<module name="GenericWhitespace"/>
2 changes: 1 addition & 1 deletion checkstyle/import-control.xml
Original file line number Diff line number Diff line change
@@ -12,8 +12,8 @@
<allow pkg="org.slf4j"/>
<allow pkg="org.junit"/>
<allow pkg="com.google"/>
<allow pkg="io.grpc"/>
<allow pkg="io.pravega"/>
<allow pkg="lombok"/>
<allow pkg="org.apache"/>
<allow pkg="edu.umd.cs.findbugs"/>

8 changes: 7 additions & 1 deletion checkstyle/spotbugs-exclude.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
<!--
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
-->
<FindBugsFilter>
<Match> <!-- does not work well with futures -->
7 changes: 6 additions & 1 deletion checkstyle/spotbugs-include.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
<!--
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-->
<FindBugsFilter>
<!-- Probable bug - an apparent coding mistake resulting in code that was
2 changes: 1 addition & 1 deletion checkstyle/suppressions.xml
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">

<!--~
Copyright (c) 2017 Dell Inc., or its subsidiaries.
Copyright Pravega Authors.
-->

<suppressions>
45 changes: 31 additions & 14 deletions documentation/src/docs/batch.md
Original file line number Diff line number Diff line change
@@ -3,32 +3,40 @@ title: Batch
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

The Flink Connector library for Pravega makes it possible to use a Pravega Stream as a data source and data sink in a batch program. See the below sections for details.

## Table of Contents

- [FlinkPravegaInputFormat](#flinkpravegainputformat)
- [Parameters](#parameters)
- [Input Stream(s)](#input-streams)
- [StreamCuts](#streamcuts)
- [Parallelism](#parallelism)
- [Parameters](#parameters)
- [Input Stream(s)](#input-streams)
- [StreamCuts](#streamcuts)
- [Parallelism](#parallelism)

- [FlinkPravegaOutputFormat](#flinkpravegaoutputformat)
- [Parameters](#parameters)
- [Output Stream](#output-stream)
- [Parallelism](#parallelism)
- [Event Routing](#event-routing)
- [Parameters](#parameters)
- [Output Stream](#output-stream)
- [Parallelism](#parallelism)
- [Event Routing](#event-routing)
- [Serialization](#serialization)

## FlinkPravegaInputFormat

A Pravega Stream may be used as a data source within a Flink batch program using an instance of
`io.pravega.connectors.flink.FlinkPravegaInputFormat`. The input format reads events of a stream as a [`DataSet`](https://ci.apache.org/projects/flink/flink-docs-master/api/java/org/apache/flink/api/java/DataSet.html) (the basic abstraction of the Flink Batch API). This input format opens the stream for batch reading, which processes stream segments in **parallel** and does not follow routing key order.

@@ -57,6 +65,7 @@ DataSource<EventType> dataSet = env.createInput(inputFormat, TypeInformation.of(
```

### Parameters

A builder API is provided to construct an instance of `FlinkPravegaInputFormat`. See the table below for a summary of builder properties. Note that the builder accepts an instance of `PravegaConfig` for common configuration properties. See the [configurations](configurations.md) page for more information.

|Method |Description|
@@ -66,11 +75,13 @@ A builder API is provided to construct an instance of `FlinkPravegaInputFormat`.
|`withDeserializationSchema`|The deserialization schema which describes how to turn byte messages into events.|

### Input Stream(s)

Each Pravega stream exists within a scope. A scope defines a namespace for streams such that names are unique. Across scopes, streams can have the same name. For example, if we have scopes `A` and `B`, then we can have a stream called `myStream` in each one of them. We cannot have a stream with the same name in the same scope. The builder API accepts both **qualified** and **unqualified** stream names.

- In qualified stream names, the scope is explicitly specified, e.g. `my-scope/my-stream`.
- In unqualified stream names are assumed to refer to the default scope as set in the `PravegaConfig`.
See the [configurations](configurations.md) page for more information on default scope.
- In qualified stream names, the scope is explicitly specified, e.g. `my-scope/my-stream`.
- In unqualified stream names are assumed to refer to the default scope as set in the `PravegaConfig`.

See the [configurations](configurations.md) page for more information on default scope.

A stream may be specified in one of three ways:

@@ -87,9 +98,11 @@ A `StreamCut` represents a specific position in a Pravega Stream, which may be o
If stream cuts are not provided then the default start position requested is assumed to be the earliest available data in the stream and the default end position is assumed to be all available data in that stream as of when the job execution begins.

### Parallelism

`FlinkPravegaInputFormat` supports parallelization. Use the `setParallelism` method of `DataSet` to configure the number of parallel instances to execute. The parallel instances consume the stream in a coordinated manner, each consuming one or more stream segments.

## FlinkPravegaOutputFormat

A Pravega Stream may be used as a data sink within a Flink batch program using an instance of `io.pravega.connectors.flink.FlinkPravegaOutputFormat`. The `FlinkPravegaOutputFormat` can be supplied as a sink to the [`DataSet`](https://ci.apache.org/projects/flink/flink-docs-master/api/java/org/apache/flink/api/java/DataSet.html#output-org.apache.flink.api.common.io.OutputFormat-) (the basic abstraction of the Flink Batch API).

### Example
@@ -120,6 +133,7 @@ env.execute("...");
```

### Parameter

A builder API is provided to construct an instance of `FlinkPravegaOutputFormat`. See the table below for a summary of builder properties. Note that the builder accepts an instance of `PravegaConfig` for common configuration properties. See the [configurations](configurations.md) page for more information.

|Method |Description|
@@ -133,8 +147,8 @@ A builder API is provided to construct an instance of `FlinkPravegaOutputFormat`

Each stream in Pravega is contained by a scope. A scope acts as a namespace for one or more streams. The builder API accepts both **qualified** and **unqualified** stream names.

- In qualified, the scope is explicitly specified, e.g. `my-scope/my-stream`.
- In Unqualified stream names are assumed to refer to the default scope as set in the `PravegaConfig`.
- In qualified, the scope is explicitly specified, e.g. `my-scope/my-stream`.
- In Unqualified stream names are assumed to refer to the default scope as set in the `PravegaConfig`.

A stream may be specified in one of three ways:

@@ -143,12 +157,15 @@ A stream may be specified in one of three ways:
3. As an instance of `io.pravega.client.stream.Stream`, e.g. `Stream.of("my-scope", "my-stream")`.

### Parallelism

`FlinkPravegaWriter` supports parallelization. Use the `setParallelism` method to configure the number of parallel instances to execute.

### Event Routing

Every event written to a Pravega Stream has an associated Routing Key. The Routing Key is the basis for event ordering. See the [Pravega Concepts](http://pravega.io/docs/latest/pravega-concepts/#events) for details.

To establish the routing key for each event, provide an implementation of `io.pravega.connectors.flink.PravegaEventRouter` when constructing the writer.

## Serialization

Please, see the [serialization](serialization.md) page for more information on how to use the _serializer_ and _deserializer_.
177 changes: 177 additions & 0 deletions documentation/src/docs/catalog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
<!--
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

# Pravega Catalogs

## General Catalog Introduction

[Flink Catalogs](https://ci.apache.org/projects/flink/flink-docs-stable/docs/dev/table/catalogs/) provide metadata such as databases, tables, partitions, views, functions and information needed to access data stored in a database or other external systems. It provides a unified API for managing metadata and making it accessible from the Table API and SQL Queries.

A Catalog enables users to reference existing metadata in their data systems and automatically maps them to Flink's corresponding metadata. For example, Flink can map JDBC tables to Flink tables automatically and users don’t have to manually re-writing DDLs in Flink. A catalog simplifies steps required to get started with connecting Flink and user's existing systems, improving the user experience.

## Basic ideas and building blocks of Pravega Catalog

Pravega uses terms such as *streams* and *scopes* for managing streaming data, but it does not have the concepts of tables and databases. These terms however can be thought of as similar. For example, if a Pravega stream contains semi-structured data such as JSON format, it is feasible to map Pravega streams to Flink tables with the help of a schema registry service.

[Pravega Schema Registry](https://github.com/pravega/schema-registry) is built for such a purpose. It is the registry service built on Pravega that helps store and manage schemas for data stored in Pravega streams. It also provides a factory of methods to standardize the serialization with built-in support for popular serialization formats in Avro, Protobuf, JSON schemas, as well as custom serialization.

### `PravegaRegistryFormatFactory`

When using Schema Registry serialization, further information is required in order to describe how to map binary data onto table columns. A new table format named `pravega-registry` has been added to define this mapping.

**Note:** The `pravega-registry` format factory should ONLY be used with the `PravegaCatalog`. Currently it supports only Json and Avro formats without any additional encryption and compression codecs.

It has following options:

| Option | Required | Default | Type | Description |
|-----------------------------|---------------------|---------------|--------------|-------------------------------------------------------------------------------------|
| format | required | (none) | String | Specify what format to use, here should be 'pravega-registry' |
| pravega-registry.uri | required | (none) | String | Pravega Schema Registry service URI |
| pravega-registry.namespace | required | (none) | String | Pravega Schema Registry namespace, should be the same name as Pravega scope |
| pravega-registry.group-id | required | (none) | String | Pravega Schema Registry group ID, should be the same name as Pravega stream |
| pravega-registry.format | optional | Avro | String | Default format for serialization in table sink, Valid values are 'Json' and 'Avro' |
| pravega-registry.json.* | optional | (none) | - | Specification for json format, completely inherited from official Flink Json format factory, refer to this [doc](https://ci.apache.org/projects/flink/flink-docs-stable/docs/connectors/table/formats/json/#format-options) for details |

A `PravegaCatalog` is built to manage Pravega streams as Flink tables based on it's schema registry and this table format.
It can map all the streams with its Json/Avro schema registered and users can directly read from/write to the stream without establishing connection with extra SQL DDL.

## Pravega as a Catalog

The `PravegaCatalog` enables users to connect Flink to Pravega streams. The following table shows the mapping between Flink Catalog and Pravega terms:

| Flink Catalog terms | Pravega terms |
|--------------------------------------|-------------------|
| catalog name (defined in Flink only) | N/A |
| database name | scope name |
| table name | stream name |

Currently `PravegaCatalog` only supports a limited set of `Catalog` methods:

```java
// The supported methods by Pravega Catalog
PravegaCatalog.listDatabases();
PravegaCatalog.getDatabase(String databaseName);
PravegaCatalog.databaseExists(String databaseName);
PravegaCatalog.createDatabase(String name, CatalogDatabase database, boolean ignoreIfExists);
PravegaCatalog.dropDatabase(String name, boolean ignoreIfNotExists, boolean cascade);
PravegaCatalog.listTables(String databaseName);
PravegaCatalog.getTable(ObjectPath tablePath);
PravegaCatalog.tableExists(ObjectPath tablePath);
PravegaCatalog.dropTable(ObjectPath tablePath, boolean ignoreIfNotExists);
PravegaCatalog.createTable(ObjectPath tablePath, CatalogBaseTable table, boolean ignoreIfExists);
```

Only these database and table operations are currently supported. Views/partitions/functions/statistics operations are NOT supported in `PravegaCatalog`.

### Catalog options

Pravega Catalog supports the following options:

- name: required, name of the catalog
- type: required, type of the catalog, should be 'pravega' here
- controller-uri: required, URI of the Pravega controller connected to
- schema-registry-uri: required, URI of the Schema Registry service connected to
- default-database: required, default Pravega scope which must be created already
- serialization.format: optional, a static serialization format for the catalog, valid values are 'Avro'(default) and 'Json', this is the format used for all the table sinks in the catalog.
- security.auth-type: optional, the static authentication/authorization type for security for Pravega
- security.auth-token: optional, static authentication/authorization token for security for Pravega
- security.validate-hostname: optional, flag to decide whether to enable host name validation when TLS is enabled for Pravega
- security.trust-store: optional, trust store for Pravega client
- json.*: optional, json format specifications for the catalog table sink, will inherit into `PravegaRegistryFormatFactory` for all catalog table sinks

## How to use Pravega Catalog

Users can use SQL DDL or Java/Scala programatically to create and register Pravega Flink Catalog.

#### SQL

```sql
CREATE CATALOG pravega_catalog WITH(
'type' = 'pravega',
'default-database' = 'scope1',
'controller-uri' = 'tcp://localhost:9090',
'schema-registry-uri' = 'http://localhost:9092'
);

USE CATALOG pravega_catalog;
```

#### Java

```java
TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.newInstance().build());

String name = "pravega_catalog";
String defaultDatabase = "scope1";
String controllerURI = "tcp://localhost:9090";
String schemaRegistryURI = "http://localhost:9092";

PravegaCatalog catalog = new PravegaCatalog(name, defaultDatabase, controllerURI, schemaRegistryURI);
tableEnv.registerCatalog("pravega_catalog", catalog);

// set the PravegaCatalog as the current catalog of the session
tableEnv.useCatalog("pravega_catalog");
```

#### YAML

```yaml
execution:
...
current-catalog: pravega_catalog # set the PravegaCatalog as the current catalog of the session
current-database: scope1

catalogs:
- name: pravega_catalog
type: pravega
controller-uri: tcp://localhost:9090
schema-registry-uri: http://localhost:9092
default-database: scope1
```
After that, you can operate Pravega scopes and streams with SQL commands. Here are some examples.
```sql
-- List all the scopes
SHOW DATABASES;

-- Create a scope
CREATE DATABASE scope2 WITH (...);

-- Delete a scope, if 'CASCADE' is followed, all the streams will also be dropped.
DROP DATABASE scope2;

-- List all the streams with schema registered in the scope
SHOW TABLES;

-- Create a stream and register the schema
CREATE TABLE mytable (name STRING, age INT) WITH (...);

-- Delete a stream, the schema group will also be deleted
DROP DATABASE scope2;

-- Scan/Query the stream `test_table` as a table
SELECT * FROM pravega_catalog.scope1.test_table;
SELECT count(DISTINCT name) FROM test_table;

-- Write rows/Copy a table into the stream `test_table`
INSERT INTO test_table VALUES('Tom', 30);
INSERT INTO test_table SELECT * FROM mytable;
```

## Useful Flink links

See [Flink Table catalogs docs](https://ci.apache.org/projects/flink/flink-docs-stable/dev/table/catalogs.html) for more information on the general Catalog concepts and more detailed operations.
25 changes: 20 additions & 5 deletions documentation/src/docs/configurations.md
Original file line number Diff line number Diff line change
@@ -3,27 +3,35 @@ title: Configurations
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

The Flink connector library for Pravega supports the **Flink Streaming API**, **Table API** and **Batch API**, using a common configuration class.

## Table of Contents

- [Common Configuration](#common-configuration)
- [PravegaConfig Class](#pravegaconfig-class)
- [Creating PravegaConfig](#creating-pravegaconfig)
- [Using PravegaConfig](#using-pravegaconfig)
- [Understanding the Default Scope](#understanding-the-default-scope)
- [PravegaConfig Class](#pravegaconfig-class)
- [Creating PravegaConfig](#creating-pravegaconfig)
- [Using PravegaConfig](#using-pravegaconfig)
- [Understanding the Default Scope](#understanding-the-default-scope)

## Common Configuration

### PravegaConfig Class

A top-level config object, `PravegaConfig`, is provided to establish a Pravega context for the Flink connector. The config object automatically configures itself from _environment variables_, _system properties_ and _program arguments_.

`PravegaConfig` information sources is given below:
@@ -36,18 +44,22 @@ A top-level config object, `PravegaConfig`, is provided to establish a Pravega c
|Hostname Validation|-|`true`|

### Creating PravegaConfig

The recommended way to create an instance of `PravegaConfig` is to pass an instance of `ParameterTool` to `fromParams`:

```java
ParameterTool params = ParameterTool.fromArgs(args);
PravegaConfig config = PravegaConfig.fromParams(params);
```

If your application doesn't use the `ParameterTool` class that is provided by Flink, create the `PravegaConfig` using `fromDefaults`:

```java
PravegaConfig config = PravegaConfig.fromDefaults();
```

The `PravegaConfig` class provides a builder-style API to override the default configuration settings:

```java
PravegaConfig config = PravegaConfig.fromDefaults()
.withControllerURI("tcp://...")
@@ -57,7 +69,9 @@ PravegaConfig config = PravegaConfig.fromDefaults()
```

### Using PravegaConfig

All of the various source and sink classes provided with the connector library have a builder-style API which accepts a `PravegaConfig` for common configuration. Pass a `PravegaConfig` object to the respective builder via `withPravegaConfig`. For example, see below code:

```java
PravegaConfig config = ...;

@@ -68,6 +82,7 @@ FlinkPravegaReader<MyClass> pravegaSource = FlinkPravegaReader.<MyClass>builder(
```

### Understanding the Default Scope

Pravega organizes streams into _scopes_ for the purposes of manageability. The `PravegaConfig` establishes a default scope name that is used in two scenarios:

1. For resolving unqualified stream names when constructing a source or sink. The sources and sinks accept stream names that may be **qualified** (e.g. `my-scope/my-stream`) or **unqualified** (e.g. `my-stream`).
83 changes: 60 additions & 23 deletions documentation/src/docs/dev-guide.md
Original file line number Diff line number Diff line change
@@ -3,67 +3,82 @@ title: Developer's Guide
---

<!--
Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

# Flink Connector - Dev Guide

Learn how to build your own applications that using Flink connector for Pravega.


# Prerequisites

To complete this guide, you need:

* JDK 8 or 11 installed with JAVA_HOME configured appropriately
* Pravega running(Check [here](https://pravega.io/docs/latest/getting-started/) to get started with Pravega)
* Use Gradle or Maven



# Goal

In this guide, we will create a straightforward example application that writes data collected from an external network stream into a Pravega Stream and read the data from the Pravega Stream.
We recommend that you follow the instructions from [Bootstrapping project](#Bootstrapping-the-Project) onwards to create the application step by step.
However, you can go straight to the completed example at [flink-connector-examples](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples).

# Starting Flink

Download Flink release and un-tar it. We use Flink 1.11.2 here.


# Starting Flink
Download Flink release and un-tar it. We use Flink 1.11.2 here.
```
$ tar -xzf flink-1.11.2-bin-scala_2.11.tgz
$ cd flink-1.11.2-bin-scala_2.11
```bash
$ tar -xzf flink-1.11.2-bin-scala_2.12.tgz
$ cd flink-1.11.2-bin-scala_2.12
```

Start a cluster
```

```bash
$ ./bin/start-cluster.sh
Starting cluster.
Starting standalonesession daemon on host.
Starting taskexecutor daemon on host.
```

When you are finished you can quickly stop the cluster and all running components.
```

```bash
$ ./bin/stop-cluster.sh
```

# Bootstrapping the Project.
# Bootstrapping the Project

Using Gradle or Maven to bootstrap a sample application against Pravega. Let's create a word count application as an example.
### Gradle

## Gradle

You can follow [here](https://ci.apache.org/projects/flink/flink-docs-stable/dev/project-configuration.html#gradle) to create a gradle project.

Add the below snippet to dependencies section of build.gradle in the app directory, connector dependencies should be part of the shadow jar. For flink connector dependency, we need to choose the connector which aligns the Flink major version and Scala version if you use Scala, along with the same Pravega version you run.
```

```groovy
compile group 'org.apache.flink', name: 'flink-streaming-java_2.12', version: '1.11.2'
flinkShadowJar group: 'io.pravega', name: 'pravega-connectors-flink-1.11_2.12', version: '0.9.0'
```

Define custom configurations `flinkShadowJar`
```

```groovy
// -> Explicitly define the // libraries we want to be included in the "flinkShadowJar" configuration!
configurations {
flinkShadowJar // dependencies which go into the shadowJar
@@ -78,13 +93,13 @@ configurations {

Invoke `gradle clean shadowJar` to build/package the project. You will find a JAR file that contains your application, plus connectors and libraries that you may have added as dependencies to the application: `build/libs/<project-name>-<version>-all.jar`.


### Maven
## Maven

You can check [maven-quickstart](https://ci.apache.org/projects/flink/flink-docs-release-1.12/dev/project-configuration.html#maven-quickstart) to find how to start with Maven.

Add below dependencies into Maven POM, these dependencies should be part of the shadow jar
```

```xml
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
@@ -101,27 +116,31 @@ Add below dependencies into Maven POM, these dependencies should be part of the

Invoke `mvn clean package` to build/package your project. You will find a JAR file that contains your application, plus connectors and libraries that you may have added as dependencies to the application: `target/<artifact-id>-<version>.jar`.




## Create an application that writes to Pravega

Let’s first create a pravega configuration reading from arguments:

```java
ParameterTool params = ParameterTool.fromArgs(args);
PravegaConfig pravegaConfig = PravegaConfig
.fromParams(params)
.withDefaultScope("my_scope");
```

Then we need to initialize the Flink execution environment

```java
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
```

Create a datastream that gets input data by connecting to the socket

```java
DataStream<String> dataStream = env.socketTextStream(host, port);
```

A Pravega Stream may be used as a data sink within a Flink program using an instance of `io.pravega.connectors.flink.FlinkPravegaWriter`. We add an instance of the writer to the dataflow program:

```java
FlinkPravegaWriter<String> writer = FlinkPravegaWriter.<String>builder()
.withPravegaConfig(pravegaConfig)
@@ -130,57 +149,75 @@ FlinkPravegaWriter<String> writer = FlinkPravegaWriter.<String>builder()
.build();
dataStream.addSink(writer).name("Pravega Sink");
```

Then we execute the job within the Flink environment

```java
env.execute("PravegaWriter");
```

Executing the above lines should ensure we have created a PravegaWriter job

## Create an application that reads from Pravega

Creating a Pravega Reader is similar to Pravega Writer
First create a pravega configuration reading from arguments:

```java
ParameterTool params = ParameterTool.fromArgs(args);
PravegaConfig pravegaConfig = PravegaConfig
.fromParams(params)
.withDefaultScope("my_scope");
```

Initialize the Flink execution environment

```java
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
```

A Pravega Stream may be used as a data source within a Flink streaming program using an instance of `io.pravega.connectors.flink.FlinkPravegaReader`. The reader reads a given Pravega Stream (or multiple streams) as a DataStream

```java
FlinkPravegaReader<String> source = FlinkPravegaReader.<String>builder()
.withPravegaConfig(pravegaConfig)
.forStream(stream)
.withDeserializationSchema(new SimpleStringSchema())
.build();
```

Then create a datastream count each word over a 10 second time period

```java
DataStream<WordCount> dataStream = env.addSource(source).name("Pravega Stream")
.flatMap(new Tokenizer()) // The Tokenizer() splits the line into words, and emit streams of "WordCount(word, 1)"
.keyBy("word")
.timeWindow(Time.seconds(10))
.sum("count");
```

Create an output sink to print to stdout for verification

```java
dataStream.print();
```

Then we execute the job within the Flink environment

```java
env.execute("PravegaReader");
```

## Run in flink environment

First build your application. From Flink's perspective, the connector to Pravega is part of the streaming application (not part of Flink's core runtime), so the connector code must be part of the application's code artifact (JAR file). Typically, a Flink application is bundled as a `fat-jar` (also known as an `uber-jar`) , such that all its dependencies are embedded.

Make sure your Pravega and Flink are running. Use the packaged jar, and run:
```

```bash
flink run -c <classname> ${your-app}.jar --controller <pravega-controller-uri>
```

# What’s next?

This guide covered the creation of a application that uses Flink connector to read and wirte from a pravega stream. However, there is much more. We recommend continuing the journey by going through [flink connector documents](https://pravega.io/docs/latest/connectors/flink-connector/) and check other examples on [flink-connector-examples](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples).
37 changes: 18 additions & 19 deletions documentation/src/docs/getting-started.md
Original file line number Diff line number Diff line change
@@ -3,34 +3,35 @@ title: Getting Started
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

import TOCInline from '@theme/TOCInline';

[![Build Status](https://travis-ci.org/pravega/flink-connectors.svg?branch=master)](https://travis-ci.org/pravega/flink-connectors)

This repository implements connectors to read and write [Pravega](http://pravega.io/) Streams with [Apache Flink](http://flink.apache.org/) stream processing framework.
This repository implements connectors to read and write [Pravega](http://pravega.io/) Streams with [Apache Flink](http://flink.apache.org/) stream processing framework.

The connectors can be used to build end-to-end stream processing pipelines (see [Samples](https://github.com/pravega/pravega-samples)) that use Pravega as the stream storage and message bus, and Apache Flink for computation over the streams.

<TOCInline toc={toc} />

## Features & Highlights

- **Exactly-once processing guarantees** for both Reader and Writer, supporting **end-to-end exactly-once processing pipelines**

- Seamless integration with Flink's checkpoints and savepoints.

- Parallel Readers and Writers supporting high throughput and low latency processing.

- Table API support to access Pravega Streams for both **Batch** and **Streaming** use case.
- **Exactly-once processing guarantees** for both Reader and Writer, supporting **end-to-end exactly-once processing pipelines**
- Seamless integration with Flink's checkpoints and savepoints.
- Parallel Readers and Writers supporting high throughput and low latency processing.
- Table API support to access Pravega Streams for both **Batch** and **Streaming** use case.

## Building Connectors

@@ -40,19 +41,19 @@ The connector project is linked to a specific version of Pravega and the version

Checkout the source code repository by following below steps:

```
```bash
git clone https://github.com/pravega/flink-connectors.git
```

After cloning the repository, the project can be built by running the below command in the project root directory `flink-connectors`.

```
```bash
./gradlew clean build
```

To install the artifacts in the local maven repository cache `~/.m2/repository`, run the following command:

```
```bash
./gradlew clean install
```

@@ -78,14 +79,12 @@ In order to build a new version of Flink for a different Scala version, please r

## Setting up your IDE

Connector project uses [Project Lombok](https://projectlombok.org/), so we should ensure that we have our IDE setup with the required plugins. (**IntelliJ is recommended**).
IntelliJ is recommended for project connector.

To import the source into IntelliJ:

1. Import the project directory into IntelliJ IDE. It will automatically detect the gradle project and import things correctly.
2. Enable `Annotation Processing` by going to `Build, Execution, Deployment` -> `Compiler` > `Annotation Processors` and checking `Enable annotation processing`.
3. Install the `Lombok Plugin`. This can be found in `Preferences` -> `Plugins`. Restart your IDE.
4. Connectors project compiles properly after applying the above steps.
2. Connectors project compiles properly after applying the above step.

For eclipse, we can generate eclipse project files by running `./gradlew eclipse`.

@@ -100,4 +99,4 @@ Open an issue if you found a bug on [Github Issues](https://github.com/pravega/f

## Samples

Follow the [Pravega Samples](https://github.com/pravega/pravega-samples) repository to learn more about how to build and use the Flink Connector library.
Follow the [Pravega Samples](https://github.com/pravega/pravega-samples) repository to learn more about how to build and use the Flink Connector library.
9 changes: 7 additions & 2 deletions documentation/src/docs/metrics.md
Original file line number Diff line number Diff line change
@@ -3,18 +3,23 @@ title: Metrics
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

Pravega metrics are collected and exposed via Flink metrics framework when using [`FlinkPravegaReader`](streaming.md#flinkpravegareader) or [`FlinkPravegaWriter`](streaming.md#flinkpravegawriter).


## Reader Metrics

The following metrics are exposed for `FlinkPravegaReader` related operations:
67 changes: 38 additions & 29 deletions documentation/src/docs/overview.md
Original file line number Diff line number Diff line change
@@ -1,29 +1,38 @@
---
title: Overview
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
-->

This documentation describes the connectors API and it's usage to read and write [Pravega](http://pravega.io/) streams with [Apache Flink](http://flink.apache.org/) stream processing framework.

Build end-to-end stream processing pipelines that use Pravega as the stream storage and message bus, and Apache Flink for computation over the streams. See the [Pravega Concepts](http://pravega.io/docs/pravega-concepts/) page for more information.

## Table of Contents

- [Getting Started](getting-started.md)
- [Quick Start](quickstart.md)
- Features
- [Streaming](streaming.md)
- [Batch](batch.md)
- [Table API/SQL](table-api.md)
- [Metrics](metrics.md)
- [Configurations](configurations.md)
- [Serialization](serialization.md)
---
title: Overview
---

<!--
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

This documentation describes the connectors API and it's usage to read and write [Pravega](http://pravega.io/) streams with [Apache Flink](http://flink.apache.org/) stream processing framework.

Build end-to-end stream processing pipelines that use Pravega as the stream storage and message bus, and Apache Flink for computation over the streams. See the [Pravega Concepts](http://pravega.io/docs/pravega-concepts/) page for more information.

## Table of Contents

- [Getting Started](getting-started.md)
- [Quick Start](quickstart.md)
- [Dev Guide](dev-guide.md)
- Features
- [Streaming](streaming.md)
- [Batch](batch.md)
- [Table API/SQL](table-api.md)
- [Catalog](catalog.md)
- [Python Datastream API](python.md)
- [Metrics](metrics.md)
- [Configurations](configurations.md)
- [Serialization](serialization.md)
125 changes: 125 additions & 0 deletions documentation/src/docs/python.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Pravega Python DataStream connector

This Pravega Python DataStream connector provides a data source and data sink for Flink streaming jobs.

Your Flink streaming jobs could use Pravega as their storage with these [Python API Wrappers](https://github.com/pravega/flink-connectors/tree/master/src/main/python). This page only describes the API usage and for parameter concepts please refer to [Configurations](configurations.md) and [Streaming](streaming.md)

**DISCLAIMER: This python wrapper is an IMPLEMENTATION REFERENCE and is not officially published.**

* [How to use](#How-to-use)
* [PravegaConfig](#PravegaConfig)
* [StreamCut](#StreamCut)
* [FlinkPravegaReader](#FlinkPravegaReader)
* [FlinkPravegaWriter](#FlinkPravegaWriter)
* [Metrics](#Metrics)
* [Serialization](#Serialization)

## How to use

Together with the connector jar and python wrapper files, you could submit your job with main compute code like this:

```bash
flink run --python ./application.py --pyFiles <connector-repo>/src/main/python/ --jarfile /path/to/pravega-connectors-flink.jar
```

## PravegaConfig

A top-level config object, `PravegaConfig`, is provided to establish a Pravega context for the Flink connector.

```python
from pravega_config import PravegaConfig

pravega_config = PravegaConfig(uri=uri, scope=scope)
```

|parameter|type|required|default value|description|
|-|-|-|-|-|
|uri|str|Yes|N/A|The Pravega controller RPC URI.|
|scope|str|Yes|N/A|The self-defined Pravega scope.|
|trust_store|str|No|None|The truststore value.|
|default_scope|str|No|None|The default Pravega scope, to resolve unqualified stream names and to support reader groups.|
|credentials|DefaultCredentials|No|None|The Pravega credentials to use.|
|validate_hostname|bool|No|True|TLS hostname validation.|

## StreamCut

A `StreamCut` object could be constructed from the `from_base64` class method where a base64 str is passed as the only parameter.

By default, the `FlinkPravegaReader` will pass the `UNBOUNDED` `StreamCut` which let the reader read from the HEAD to the TAIL.

## FlinkPravegaReader

Use `FlinkPravegaReader` as a datastream source. Could be added by `env.add_source`.

```python
from pyflink.common.serialization import SimpleStringSchema
from pyflink.datastream import StreamExecutionEnvironment

from pravega_config import PravegaConfig
from pravega_reader import FlinkPravegaReader

env = StreamExecutionEnvironment.get_execution_environment()

pravega_config = PravegaConfig(uri=uri, scope=scope)
pravega_reader = FlinkPravegaReader(
stream=stream,
pravega_config=pravega_config,
deserialization_schema=SimpleStringSchema())

ds = env.add_source(pravega_reader)
```

|parameter|type|required|default value|description|
|-|-|-|-|-|
|stream|Union[str, Stream]|Yes|N/A|The stream to be read from.|
|pravega_config|PravegaConfig|Yes|N/A|Set the Pravega client configuration, which includes connection info, security info, and a default scope.|
|deserialization_schema|DeserializationSchema|Yes|N/A|The deserialization schema which describes how to turn byte messages into events.|
|start_stream_cut|StreamCut|No|StreamCut.UNBOUNDED|Read from the given start position in the stream.|
|end_stream_cut|StreamCut|No|StreamCut.UNBOUNDED|Read to the given end position in the stream.|
|enable_metrics|bool|No|True|Pravega reader metrics.|
|uid|str|No|None(random generated uid on java side)|The uid to identify the checkpoint state of this source.|
|reader_group_scope|str|No|pravega_config.default_scope|The scope to store the Reader Group synchronization stream into.|
|reader_group_name|str|No|None(auto-generated name on java side)|The Reader Group name for display purposes.|
|reader_group_refresh_time|timedelta|No|None(3 seconds on java side)|The interval for synchronizing the Reader Group state across parallel source instances.|
|checkpoint_initiate_timeout|timedelta|No|None(5 seconds on java side)|The timeout for executing a checkpoint of the Reader Group state.|
|event_read_timeout|timedelta|No|None(1 second on java side)|Sets the timeout for the call to read events from Pravega. After the timeout expires (without an event being returned), another call will be made.|
|max_outstanding_checkpoint_request|int|No|None(3 on java side)|Configures the maximum outstanding checkpoint requests to Pravega.|

## FlinkPravegaWriter

Use `FlinkPravegaWriter` as a datastream sink. Could be added by `env.add_sink`.

```python
from pyflink.common.serialization import SimpleStringSchema
from pyflink.datastream import StreamExecutionEnvironment

from pravega_config import PravegaConfig
from pravega_writer import FlinkPravegaWriter

env = StreamExecutionEnvironment.get_execution_environment()

pravega_config = PravegaConfig(uri=uri, scope=scope)
pravega_writer = FlinkPravegaWriter(stream=stream,
pravega_config=pravega_config,
serialization_schema=SimpleStringSchema())

ds = env.add_sink(pravega_reader)
```

|parameter|type|required|default value|description|
|-|-|-|-|-|
|stream|Union[str, Stream]|Yes|N/A|Add a stream to be read by the source, from the earliest available position in the stream.|
|pravega_config|PravegaConfig|Yes|N/A|Set the Pravega client configuration, which includes connection info, security info, and a default scope.|
|serialization_schema|SerializationSchema|Yes|N/A|The serialization schema which describes how to turn events into byte messages.|
|enable_metrics|bool|No|True|Pravega writer metrics.|
|writer_mode|PravegaWriterMode|No|PravegaWriterMode.ATLEAST_ONCE|The writer mode to provide *Best-effort*, *At-least-once*, or *Exactly-once* guarantees.|
|enable_watermark|bool|No|False|Emit Flink watermark in event-time semantics to Pravega streams.|
|txn_lease_renewal_period|timedelta|No|None(30 seconds on java side)|Report Pravega metrics.|

## Metrics

Metrics are reported by default unless it is explicitly disabled using enable_metrics(False) option. See [Metrics](metrics.md) page for more details on type of metrics that are reported.

## Serialization

See the [Data Types](https://ci.apache.org/projects/flink/flink-docs-stable/docs/dev/python/datastream/data_types/) page of PyFlink for more information.
27 changes: 15 additions & 12 deletions documentation/src/docs/quickstart.md
Original file line number Diff line number Diff line change
@@ -3,13 +3,19 @@ title: Quick Start
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

## Creating a Flink Stream Processing Project
@@ -18,8 +24,8 @@ You may obtain a copy of the License at

Please use the following project templates and setup guidelines, to set up a stream processing project with Apache Flink using Connectors

- [Project template for Java](https://ci.apache.org/projects/flink/flink-docs-stable/quickstart/java_api_quickstart.html)
- [Project template for Scala](https://ci.apache.org/projects/flink/flink-docs-stable/quickstart/scala_api_quickstart.html)
- [Project template for Java](https://ci.apache.org/projects/flink/flink-docs-stable/quickstart/java_api_quickstart.html)
- [Project template for Scala](https://ci.apache.org/projects/flink/flink-docs-stable/quickstart/scala_api_quickstart.html)

Once after the set up, please follow the below instructions to add the **Flink Pravega connectors** to the project.

@@ -44,22 +50,19 @@ To add the Pravega connector dependencies to your project, add the following ent
```

Use appropriate version as necessary. `1.9` is the Flink Major-Minor version. `2.12` is the Scala version. `0.6.0` is the Pravega version.
The snapshot versions are published to [`jcenter`](https://oss.jfrog.org/artifactory/jfrog-dependencies/io/pravega/) repository and the release artifacts are available in [`Maven Central`](https://mvnrepository.com/artifact/io.pravega/pravega-connectors-flink) repository.
The snapshot versions are published to [`GitHub Packages`](https://github.com/orgs/pravega/packages) repository and the release artifacts are available in [`Maven Central`](https://mvnrepository.com/artifact/io.pravega/pravega-connectors-flink) repository.

Alternatively, we could build and publish the connector artifacts to local maven repository by executing the following command and make use of that version as your application dependency.

```
```bash
./gradlew clean install
```

## Running / Deploying the Application

From Flink's perspective, the connector to Pravega is part of the streaming application (not part of Flink's core runtime), so the connector code must be part of the application's code artifact (JAR file). Typically, a Flink application is bundled as a _`fat-jar`_ (also known as an _`uber-jar`_) , such that all its dependencies are embedded.

- The project set up should have been a success, if you have used the above linked [templates/guides](#creating-a-flink-stream-processing-project).

- If you set up a application's project and dependencies manually, you need to make sure that it builds a _jar with dependencies_, to include both the application and the connector classes.

- The Flink connector has embedded and shaded the `pravega-client` dependency of the same version. Please **DO NOT** include both `pravega-client` and `pravega-flink-connector` dependency into the jar file, otherwise there will be dependency conflict issues.

- If user application uses Table API and SQL, please **DO NOT** compile `flink-table-planner` or `flink-table-planner-blink` into the jar file as they are provided by the Flink cluster.
- The project set up should have been a success, if you have used the above linked [templates/guides](#creating-a-flink-stream-processing-project).
- If you set up a application's project and dependencies manually, you need to make sure that it builds a _jar with dependencies_, to include both the application and the connector classes.
- The Flink connector has embedded and shaded the `pravega-client` dependency of the same version. Please **DO NOT** include both `pravega-client` and `pravega-flink-connector` dependency into the jar file, otherwise there will be dependency conflict issues.
- If user application uses Table API and SQL, please **DO NOT** compile `flink-table-planner` or `flink-table-planner-blink` into the jar file as they are provided by the Flink cluster.
23 changes: 19 additions & 4 deletions documentation/src/docs/serialization.md
Original file line number Diff line number Diff line change
@@ -3,38 +3,50 @@ title: Serialization
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

**Serialization** refers to converting a data element in your Flink program to/from a message in a Pravega stream.

Flink defines a standard interface for data serialization to/from byte messages delivered by various connectors. The core interfaces are:

- [`org.apache.flink.streaming.util.serialization.SerializationSchema`]( https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/streaming/util/serialization/SerializationSchema.html)
- [`org.apache.flink.streaming.util.serialization.DeserializationSchema`]( https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/streaming/util/serialization/DeserializationSchema.html)

Built-in serializers include:

- [`org.apache.flink.streaming.util.serialization.SimpleStringSchema`](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/streaming/util/serialization/SimpleStringSchema.html)
- [`org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema`](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/streaming/util/serialization/TypeInformationSerializationSchema.html)

The Pravega connector is designed to use Flink's serialization interfaces. For example, to read each stream event as a UTF-8 string:

```java
DeserializationSchema<String> schema = new SimpleStringSchema();
FlinkPravegaReader<String> reader = new FlinkPravegaReader<>(..., schema);
DataStream<MyEvent> stream = env.addSource(reader);
```

## Interoperability with Other Applications

A common scenario is using Flink to process Pravega stream data produced by a non-Flink application. The Pravega client library used by such applications defines the [`io.pravega.client.stream.Serializer`](http://pravega.io/docs/latest/javadoc/clients/io/pravega/client/stream/Serializer.html) interface for working with event data. The implementations of `Serializer` directly in a Flink program via built-in adapters can be used:

- [`io.pravega.connectors.flink.serialization.PravegaSerializationSchema`](https://github.com/pravega/flink-connectors/blob/master/src/main/java/io/pravega/connectors/flink/serialization/PravegaSerializationSchema.java)
- [`io.pravega.connectors.flink.serialization.PravegaDeserializationSchema`](https://github.com/pravega/flink-connectors/blob/master/src/main/java/io/pravega/connectors/flink/serialization/PravegaDeserializationSchema.java)

Below is an example, to pass an instance of the appropriate Pravega de/serializer class to the adapter's constructor:

```java
import io.pravega.client.stream.impl.JavaSerializer;
...
@@ -47,10 +59,12 @@ DataStream<MyEvent> stream = env.addSource(reader);
Note that the Pravega serializer must implement `java.io.Serializable` to be usable in a Flink program.

## Deserialize with metadata
Pravega reader client wraps the event with the metadata in an `EventRead` data structure. Some Flink jobs might
care about the stream position of the event data which is in `EventRead`, e.g. for indexing purposes.

Pravega reader client wraps the event with the metadata in an `EventRead` data structure. Some Flink jobs might
care about the stream position of the event data which is in `EventRead`, e.g. for indexing purposes.

`PravegaDeserializationSchema` offers a method to extract event with the metadata

```java
public T extractEvent(EventRead<T> eventRead) {
return eventRead.getEvent();
@@ -59,6 +73,7 @@ public T extractEvent(EventRead<T> eventRead) {

The default implementation can be overwritten to involve in metadata structure like `EventPointer` into the event
by a custom extended `PravegaDeserializationSchema`. For example:

```java
private static class MyJsonDeserializationSchema extends PravegaDeserializationSchema<JsonNode> {
private boolean includeMetadata;
@@ -77,4 +92,4 @@ private static class MyJsonDeserializationSchema extends PravegaDeserializationS
return node;
}
}
```
```
67 changes: 43 additions & 24 deletions documentation/src/docs/streaming.md
Original file line number Diff line number Diff line change
@@ -3,34 +3,41 @@ title: Streaming
---

<!--
Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
Copyright Pravega Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

The Flink Connector library for Pravega provides a data source and data sink
for use with the Flink Streaming API. See the below sections for details.

## Table of Contents

- [FlinkPravegaReader](#flinkpravegareader)
- [Parameters](#parameters)
- [Input Stream(s)](#input-streams)
- [Reader Parallelism](#reader-parallelism)
- [Checkpointing](#checkpointing)
- [Timestamp Extraction (Watermark Emission)](#timestamp-extraction-watermark-emission)
- [Stream Cuts](#streamcuts)
- [Historical Stream Processing](#historical-stream-processing)
- [Parameters](#parameters)
- [Input Stream(s)](#input-streams)
- [Reader Parallelism](#reader-parallelism)
- [Checkpointing](#checkpointing)
- [Timestamp Extraction (Watermark Emission)](#timestamp-extraction-watermark-emission)
- [Stream Cuts](#streamcuts)
- [Historical Stream Processing](#historical-stream-processing)
- [FlinkPravegaWriter](#flinkpravegawriter)
- [Parameters](#parameters-1)
- [Writer Parallelism](#writer-parallelism)
- [Event Routing](#event-routing)
- [Event Time Ordering](#event-time-ordering)
- [Watermark](#watermark)
- [Writer Modes](#writer-modes)
- [Parameters](#parameters-1)
- [Writer Parallelism](#writer-parallelism)
- [Event Routing](#event-routing)
- [Event Time Ordering](#event-time-ordering)
- [Watermark](#watermark)
- [Writer Modes](#writer-modes)
- [Metrics](#metrics)
- [Data Serialization](#serialization)

@@ -41,6 +48,7 @@ A Pravega Stream may be used as a data source within a Flink streaming program u
Open a Pravega Stream as a DataStream using the method [`StreamExecutionEnvironment::addSource`](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.html#addSource-org.apache.flink.streaming.api.functions.source.SourceFunction-).

#### Example

```java
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

@@ -58,6 +66,7 @@ FlinkPravegaReader<MyClass> pravegaSource = FlinkPravegaReader.<MyClass>builder(
.build();
DataStream<MyClass> stream = env.addSource(pravegaSource);
```

### Parameters

A builder API is provided to construct an instance of `FlinkPravegaReader`. See the table below for a summary of builder properties. Note that, the builder accepts an instance of `PravegaConfig` for common configuration properties. See the [configurations](configurations.md) page for more information.
@@ -76,16 +85,17 @@ A builder API is provided to construct an instance of `FlinkPravegaReader`. See
|`enableMetrics`|true or false to enable/disable reporting Pravega metrics. Metrics is enabled by default.|

### Input Stream(s)

Each stream in Pravega is contained by a scope. A scope acts as a namespace for one or more streams. The `FlinkPravegaReader` is able to read from numerous streams in parallel, even across scopes. The builder API accepts both **qualified** and **unqualified** stream names.

- In qualified, the scope is explicitly specified, e.g. `my-scope/my-stream`.
- In Unqualified stream names are assumed to refer to the default scope as set in the `PravegaConfig`.
- In qualified, the scope is explicitly specified, e.g. `my-scope/my-stream`.
- In Unqualified stream names are assumed to refer to the default scope as set in the `PravegaConfig`.

A stream may be specified in one of three ways:

1. As a string containing a qualified name, in the form `scope/stream`.
2. As a string containing an unqualified name, in the form `stream`. Such streams are resolved to the default scope.
3. As an instance of `io.pravega.client.stream.Stream`, e.g. `Stream.of("my-scope", "my-stream")`.
1. As a string containing a qualified name, in the form `scope/stream`.
2. As a string containing an unqualified name, in the form `stream`. Such streams are resolved to the default scope.
3. As an instance of `io.pravega.client.stream.Stream`, e.g. `Stream.of("my-scope", "my-stream")`.

### Reader Parallelism

@@ -101,8 +111,8 @@ A **savepoint** is self-contained; it contains all information needed to resume

The checkpoint mechanism works as a two-step process:

- The [master hook](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/runtime/checkpoint/MasterTriggerRestoreHook.html) handler from the job manager initiates the [`triggerCheckpoint`](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/runtime/checkpoint/MasterTriggerRestoreHook.html#triggerCheckpoint-long-long-java.util.concurrent.Executor-) request to the `ReaderCheckpointHook` that was registered with the Job Manager during `FlinkPravegaReader` source initialization. The `ReaderCheckpointHook` handler notifies Pravega to checkpoint the current reader state. This is a non-blocking call which returns a `future` once Pravega readers are done with the checkpointing.
- A `CheckPoint` event will be sent by Pravega as part of the data stream flow and on receiving the event, the `FlinkPravegaReader` will initiate [`triggerCheckpoint`](https://github.com/apache/flink/blob/master/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/checkpoint/ExternallyInducedSource.java#L73) request to effectively let Flink continue and complete the checkpoint process.
- The [master hook](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/runtime/checkpoint/MasterTriggerRestoreHook.html) handler from the job manager initiates the [`triggerCheckpoint`](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/runtime/checkpoint/MasterTriggerRestoreHook.html#triggerCheckpoint-long-long-java.util.concurrent.Executor-) request to the `ReaderCheckpointHook` that was registered with the Job Manager during `FlinkPravegaReader` source initialization. The `ReaderCheckpointHook` handler notifies Pravega to checkpoint the current reader state. This is a non-blocking call which returns a `future` once Pravega readers are done with the checkpointing.
- A `CheckPoint` event will be sent by Pravega as part of the data stream flow and on receiving the event, the `FlinkPravegaReader` will initiate [`triggerCheckpoint`](https://github.com/apache/flink/blob/master/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/checkpoint/ExternallyInducedSource.java#L73) request to effectively let Flink continue and complete the checkpoint process.

### Timestamp Extraction (Watermark Emission)

@@ -125,8 +135,8 @@ To use normal watermark, you can follow [Flink documentation](https://ci.apache.

Each parallel instance of the source processes one or more stream segments in parallel. Each watermark generator instance will receive events multiplexed from numerous segments. Be aware that segments are processed in parallel, and that no effort is made to order the events across segments in terms of their event time. Also, a given segment may be reassigned to another parallel instance at any time, preserving exactly-once behavior but causing further spread in observed event times.


### StreamCuts

A `StreamCut` represents a specific position in a Pravega Stream, which may be obtained from various API interactions with the Pravega client. The `FlinkPravegaReader` accepts a `StreamCut` as the start and/or end position of a given stream. For further reading on
StreamCuts, please refer to documentation on [StreamCut](http://pravega.io/docs/latest/streamcuts/) and [sample code](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/streamcuts).

@@ -145,7 +155,7 @@ FlinkPravegaReader<MyClass> pravegaSource = FlinkPravegaReader.<MyClass>builder(
.build();
DataStream<MyClass> stream = env.addSource(pravegaSource);

```
```

#### Historical Stream Processing

@@ -154,9 +164,11 @@ Historical processing refers to processing stream data from a specific position
One such example is re-processing a stream, where we may have to process the data from the beginning (or from a certain point in the stream) to re-derive the output. For instance, in situations where the computation logic has been changed to address new additional criteria, or we fixed a bug or doing a typical A/B testing etc., where the ability to consume historical data as a stream is critical.

## FlinkPravegaWriter

A Pravega Stream may be used as a data sink within a Flink program using an instance of `io.pravega.connectors.flink.FlinkPravegaWriter`. Add an instance of the writer to the dataflow program using the method [`DataStream::addSink`](https://ci.apache.org/projects/flink/flink-docs-stable/api/java/org/apache/flink/streaming/api/datastream/DataStream.html#addSink-org.apache.flink.streaming.api.functions.sink.SinkFunction-).

### Example

```Java
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

@@ -181,6 +193,7 @@ FlinkPravegaWriter<MyClass> pravegaSink = FlinkPravegaWriter.<MyClass>builder()
DataStream<MyClass> stream = ...
stream.addSink(pravegaSink);
```

### Parameters

A builder API is provided to construct an instance of `FlinkPravegaWriter`. See the table below for a summary of builder properties. Note that the builder accepts an instance of `PravegaConfig` for common configuration properties. See the [configurations](configurations.md) page for more information.
@@ -197,14 +210,17 @@ A builder API is provided to construct an instance of `FlinkPravegaWriter`. See
|`enableMetrics`|true or false to enable/disable reporting Pravega metrics. Metrics is enabled by default.|

### Writer Parallelism

`FlinkPravegaWriter` supports parallelization. Use the `setParallelism` method to configure the number of parallel instances to execute.

### Event Routing

Every event written to a Pravega Stream has an associated Routing Key. The Routing Key is the basis for event ordering. See the [Pravega Concepts](http://pravega.io/docs/latest/pravega-concepts/#events) for details.

When constructing the `FlinkPravegaWriter`, please provide an implementation of `io.pravega.connectors.flink.PravegaEventRouter` which will guarantee the event ordering. In Pravega, events are guaranteed to be ordered at the segment level.

For example, to guarantee write order specific to sensor id, you could provide a router implementation like below.

```java
private static class SensorEventRouter<SensorEvent> implements PravegaEventRouter<SensorEvent> {
@Override
@@ -222,12 +238,14 @@ For programs that use Flink's event time semantics, the connector library suppor
Use the method `FlinkPravegaUtils::writeToPravegaInEventTimeOrder` to write a given `DataStream` to a Pravega Stream such that events are automatically ordered by event time (on a per-key basis). Refer [here](https://github.com/pravega/flink-connectors/blob/7971206038b51b3cf0e317e194c552c4646e5c20/src/test/java/io/pravega/connectors/flink/FlinkPravegaWriterITCase.java#L93) for sample code.

### Watermark

Flink applications in event time semantics are carrying watermarks within each operator.

Both Pravega transactional and non-transactional writers provide [watermark API](https://github.com/pravega/pravega/wiki/PDP-33:-Watermarking#event-writer-api-changes) to indicate the event-time watermark for a stream.
With `enableWatermark(true)`, each watermark in Flink will be emitted into a Pravega stream.

### Writer Modes

Writer modes relate to guarantees about the persistence of events emitted by the sink to a Pravega Stream. The writer supports three writer modes:

1. **Best-effort** - Any write failures will be ignored hence there could be data loss.
@@ -239,10 +257,11 @@ By default, the _At-least-once_ option is enabled and use `.withWriterMode(...)`

See the [Pravega documentation](http://pravega.io/docs/latest/pravega-concepts/#transactions) for details on transactional behavior.


# Metrics

Metrics are reported by default unless it is explicitly disabled using `enableMetrics(false)` option.
See [Metrics](metrics.md) page for more details on type of metrics that are reported.

# Serialization

See the [serialization](serialization.md) page for more information on how to use the _serializer_ and _deserializer_.
138 changes: 88 additions & 50 deletions documentation/src/docs/table-api.md

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions documentation/src/mkdocs.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
# Copyright Pravega Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

site_name: 'Exploring Pravega Flink Connectors'
site_url: 'http://pravega.io/'
@@ -30,10 +36,13 @@ nav:
- Overview: 'index.md'
- Getting Started: 'getting-started.md'
- Quick Start: 'quickstart.md'
- Dev Guide: 'dev-guide.md'
- Features:
- 'Streaming': 'streaming.md'
- 'Batch': 'batch.md'
- 'Table API': 'table-api.md'
- 'Catalog': 'catalog.md'
- 'Python Datastream API': 'python.md'
- Metrics: 'metrics.md'
- Configurations: 'configurations.md'
- Serialization: 'serialization.md'
24 changes: 15 additions & 9 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,37 +1,43 @@
#
# Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
# Copyright Pravega Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# 3rd party Versions.
checkstyleToolVersion=7.1
flinkVersion=1.12.2
flinkVersion=1.14.0
flinkScalaVersion=2.12
jacksonVersion=2.8.9
lombokVersion=1.18.4
twitterMvnRepoVersion=4.3.4-TWTTR
shadowGradlePlugin=2.0.3
shadowGradlePlugin=7.0.0
slf4jApiVersion=1.7.25
junitVersion=4.12
mockitoVersion=1.10.19
gradleGitPluginVersion=1.7.2
gradleGitPluginVersion=4.1.0
spotbugsVersion=4.0.6
spotbugsAnnotationsVersion=4.0.6
spotbugsPluginVersion=4.4.4
jcipAnnotationsVersion=1.0
gradleSshPluginVersion=2.9.0
gradleMkdocsPluginVersion=1.1.0
gradleMkdocsPluginVersion=2.1.1
jacocoVersion=0.8.2
hamcrestVersion=2.2

# Version and base tags can be overridden at build time.
connectorVersion=0.10.0-SNAPSHOT
pravegaVersion=0.10.0-2815.7461614-SNAPSHOT
schemaRegistryVersion=0.2.0-50.7d32981-SNAPSHOT
connectorVersion=0.11.0-SNAPSHOT
pravegaVersion=0.11.0-3001.0a80ee3-SNAPSHOT
schemaRegistryVersion=0.4.0-78.f1b3eef-SNAPSHOT
apacheCommonsVersion=3.7

# These properties are only needed for publishing to maven central
93 changes: 0 additions & 93 deletions gradle/bintray.gradle

This file was deleted.

9 changes: 7 additions & 2 deletions gradle/checkstyle.gradle
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
/**
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

apply plugin: 'checkstyle'

checkstyle {
toolVersion = checkstyleToolVersion

configDir = new File(rootDir, "checkstyle")
configDirectory = new File(rootDir, "checkstyle")
configProperties = [importControlFile: "$rootDir/checkstyle/import-control.xml"]
}
25 changes: 0 additions & 25 deletions gradle/idea.gradle

This file was deleted.

7 changes: 6 additions & 1 deletion gradle/jacoco.gradle
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
/**
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

apply plugin: 'jacoco'
84 changes: 61 additions & 23 deletions gradle/java.gradle
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
/**
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

apply plugin: 'java'
apply plugin: 'distribution'
buildscript {
repositories {
mavenCentral()
maven {
url "https://plugins.gradle.org/m2/"
}
}
dependencies {
classpath "gradle.plugin.com.github.jengelman.gradle.plugins:shadow:${shadowGradlePlugin}"
}
}

import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar

apply plugin: 'java-library'
apply plugin: 'com.github.johnrengelman.shadow'

compileJava {
@@ -33,34 +51,54 @@ def getFlinkMajorMinorVersion() {

archivesBaseName = "pravega-connectors-flink-" + getFlinkMajorMinorVersion() + '_' + flinkScalaVersion

assemble.dependsOn(shadowJar)
shadowJar {
// relocate pravega client's dependencies to minimize conflicts
relocate "org.apache.commons", "io.pravega.shaded.org.apache.commons"
relocate "com.google", "io.pravega.shaded.com.google"
relocate "io.grpc", "io.pravega.shaded.io.grpc"
relocate "com.squareup.okhttp", "io.pravega.shaded.com.squareup.okhttp"
relocate "okio", "io.pravega.shaded.okio"
relocate "io.opencensus", "io.pravega.shaded.io.opencensus"
relocate "io.netty", "io.pravega.shaded.io.netty"
relocate 'META-INF/native/libnetty', 'META-INF/native/libio_pravega_shaded_netty'
relocate 'META-INF/native/netty', 'META-INF/native/io_pravega_shaded_netty'

// the default classifier will be `all`, which need to be removed for compatibility
archiveClassifier.set(null)

// update package name in META-INF/services to correspond to the shaded path
mergeServiceFiles()
}

task sourceJar(type: Jar) {
classifier = 'sources'
from sourceSets.main.java
jar {
archiveClassifier.set('original')
from (['LICENSE', 'NOTICE']) {
into 'META-INF'
}
}
artifacts { shadow sourceJar }

task javadocJar(type: Jar) {
classifier = 'javadoc'
from javadoc
java {
// add javadocElements and sourcesElements variants to components
withJavadocJar()
withSourcesJar()
}
artifacts { shadow javadocJar }

task testJar(type: Jar) {
classifier = 'tests'
from sourceSets.test.output
task testJar(type: ShadowJar) {
from sourceSets.main.output, sourceSets.test.output

archiveClassifier.set("tests")

// shadow test runtime dependencies
configurations = [project.configurations.testRuntimeClasspath]

// update package name in META-INF/services to correspond to the shaded path
mergeServiceFiles()

// archive contains more than 65535 entries, so enable zip64 support
zip64 = true
}
artifacts { testRuntime testJar }

tasks.withType(Test) {
systemProperties 'logback.configurationFile' : new File(buildDir,'resources/test/logback.xml').absolutePath
testLogging.showStandardStreams = false
testLogging.exceptionFormat = "FULL"
testLogging.showCauses = true
testLogging.showExceptions = true
testLogging.showStackTraces = true
testLogging.events = ["PASSED", "FAILED"]
maxParallelForks = System.properties['maxParallelForks'] ? System.properties['maxParallelForks'].toInteger() : 1
minHeapSize = "128m"
maxHeapSize = "1024m"
168 changes: 77 additions & 91 deletions gradle/maven.gradle
Original file line number Diff line number Diff line change
@@ -1,128 +1,114 @@
/**
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

apply plugin: 'maven'
apply plugin: 'maven-publish'
apply plugin: 'signing'

if (project.hasProperty("doSigning")) {
signing {
sign shadowJar
sign sourceJar
sign javadocJar
}
artifacts {
shadow signShadowJar.signatures
shadow signSourceJar.signatures
shadow signJavadocJar.signatures
}

// Default the secretKeyRingFile to the current user's home
if (!project.property("signing.secretKeyRingFile")) {
def secretKeyPath = project.file("${System.getProperty("user.home")}/.gnupg/secring.gpg").absolutePath
project.setProperty("signing.secretKeyRingFile", secretKeyPath)
def secretKeyPath = project.file("${System.getProperty("user.home")}/.gnupg/secring.gpg").absolutePath
project.setProperty("signing.secretKeyRingFile", secretKeyPath)

signing {
sign publishing.publications
}
}

uploadArchives.enabled = false
uploadShadow {
publishing {
repositories {
mavenDeployer {
// Only configure publishing if a URL was provided
if (project.hasProperty("publishUrl")) {
if (publishUrl == "mavenCentral") {
repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") {
authentication(userName: publishUsername, password: publishPassword)
}
snapshotRepository(url: "https://oss.sonatype.org/content/repositories/snapshots/") {
authentication(userName: publishUsername, password: publishPassword)
// Only configure publishing if a URL was provided
if (project.hasProperty("publishUrl")) {
if (publishUrl == "mavenCentral") {
maven {
credentials {
username = publishUsername
password = publishPassword
}
}
else if (publishUrl == "jcenterSnapshot") {
repository(url: "https://oss.jfrog.org/artifactory/oss-snapshot-local/") {
if (project.hasProperty("publishUsername") && project.hasProperty("publishPassword")) {
authentication(userName: publishUsername, password: publishPassword)
}
if (project.version.endsWith("-SNAPSHOT")) {
url = "https://oss.sonatype.org/content/repositories/snapshots/"
} else {
url = "https://oss.sonatype.org/service/local/staging/deploy/maven2/"
}
}
else {
repository(url: publishUrl) {
// Only configure credentials if they are provided (allows publishing to the filesystem)
if (project.hasProperty("publishUsername") && project.hasProperty("publishPassword")) {
authentication(userName: publishUsername, password: publishPassword)
}
else {
maven {
url = publishUrl
allowInsecureProtocol = true
// Only configure credentials if they are provided (allows publishing to the filesystem)
if (project.hasProperty("publishUsername") && project.hasProperty("publishPassword")) {
credentials {
username = publishUsername
password = publishPassword
}
}
}
}
}
}
}

task publishToRepo(dependsOn: uploadShadow) {
description = "Publish all artifacts to repository"
}
publications {
mavenJava(MavenPublication) {
artifactId = archivesBaseName

tasks.withType(Upload) {
repositories.withType(MavenResolver) {
pom.project {
name "Pravega Flink Connectors"
url "http://pravega.io"
description "Streaming Storage Platform"
scm {
url 'https://github.com/pravega/flink-connectors/tree/master'
connection 'scm:git:git://github.com/pravega/flink-connectors.git'
developerConnection 'scm:git:https://github.com/pravega/flink-connectors.git'
}
licenses {
license {
name 'The Apache License, Version 2.0'
url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
}
}
developers {
developer {
id 'fpj'
name 'Flavio Junqueira'
}
developer {
id 'eronwright'
name 'Eron Wright'
}
}
}
// Variants from components could be retrived from `outgoingVariants` task
// For now, there are five of them:
// apiElements
// runtimeElements
// shadowRuntimeElements
// javadocElements
// sourcesElements
// So there will be `original`, ``(shadowed), `javadoc`, and `sources` jars in publish
from components.java

pom.scopeMappings.mappings.remove(configurations.testCompile)
// Add test jar to the publish
// artifact testJar

pom.withXml { xml ->
// add the 'provided' dependencies based on the 'shadowOnly' configuration
def dependenciesNode = xml.asNode().appendNode('dependencies')
project.configurations.shadowOnly.allDependencies.each { dep ->
if (!(dep instanceof SelfResolvingDependency)) {
def dependencyNode = dependenciesNode.appendNode('dependency')
dependencyNode.appendNode('groupId', dep.group)
dependencyNode.appendNode('artifactId', dep.name)
dependencyNode.appendNode('version', dep.version)
dependencyNode.appendNode('scope', 'provided')
pom {
name = "Pravega Flink Connectors"
url = "http://pravega.io"
description = "Streaming Storage Platform"
scm {
url = 'https://github.com/pravega/flink-connectors/tree/master'
connection = 'scm:git:git://github.com/pravega/flink-connectors.git'
developerConnection = 'scm:git:https://github.com/pravega/flink-connectors.git'
}
licenses {
license {
name = 'The Apache License, Version 2.0'
url = 'http://www.apache.org/licenses/LICENSE-2.0.txt'
}
}
developers {
developer {
id = 'fpj'
name = 'Flavio Junqueira'
}
developer {
id = 'crazyzhou'
name = 'Yumin Zhou'
}
}
}
}

if (project.hasProperty('doSigning')) {
beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
}
}
}

// Configure install task to use the shadow jar/pom
install.configuration = configurations.shadow
MavenPom pom = install.repositories.mavenInstaller.pom
pom.scopeMappings.mappings.remove(configurations.compile)
pom.scopeMappings.mappings.remove(configurations.runtime)
pom.scopeMappings.addMapping(MavenPlugin.RUNTIME_PRIORITY, configurations.shadow, Conf2ScopeMappingContainer.RUNTIME)
// disable gradle module metadata file
// https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html#sub:disabling-gmm-publication
tasks.withType(GenerateModuleMetadata) {
enabled = false
}
7 changes: 6 additions & 1 deletion gradle/mkdocs.gradle
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
/**
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
def gradleProject = project
if (gradleProject == gradleProject.rootProject) {
7 changes: 6 additions & 1 deletion gradle/spotbugs.gradle
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
/**
* Copyright (c) 2019 Dell / EMC Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.0.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.5.1-bin.zip
10 changes: 8 additions & 2 deletions gradlew
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
#!/usr/bin/env sh
#
# Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
# Copyright Pravega Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
@@ -123,7 +129,7 @@ fi
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

JAVACMD=`cygpath --unix "$JAVACMD"`

# We build the pattern for arguments to be converted via cygpath
29 changes: 10 additions & 19 deletions gradlew.bat
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
@rem
@rem Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
@rem Copyright Pravega Authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem http://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@@ -34,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
@@ -48,7 +54,7 @@ goto fail
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto init
if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
@@ -58,29 +64,14 @@ echo location of your Java installation.

goto fail

:init
@rem Get command-line arguments, handling Windows variants

if not "%OS%" == "Windows_NT" goto win9xME_args

:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2

:win9xME_args_slurp
if "x%~1" == "x" goto execute

set CMD_LINE_ARGS=%*

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
7 changes: 6 additions & 1 deletion settings.gradle
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
/**
* Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
pluginManagement {
it.resolutionStrategy {
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

import io.pravega.client.stream.Stream;
import io.pravega.client.stream.StreamCut;
import io.pravega.connectors.flink.util.StreamWithBoundaries;
import lombok.Data;
import org.apache.flink.annotation.Internal;
import org.apache.flink.util.Preconditions;

@@ -171,7 +176,6 @@ public boolean isMetricsEnabled() {
/**
* A Pravega stream with optional boundaries based on stream cuts.
*/
@Data
private static class StreamSpec implements Serializable {

private static final long serialVersionUID = 1L;
@@ -180,6 +184,12 @@ private static class StreamSpec implements Serializable {
private final StreamCut from;
private final StreamCut to;

private StreamSpec(String streamSpec, StreamCut from, StreamCut to) {
this.streamSpec = streamSpec;
this.from = from;
this.to = to;
}

public static StreamSpec of(String streamSpec, StreamCut from, StreamCut to) {
Preconditions.checkNotNull(streamSpec, "streamSpec");
Preconditions.checkNotNull(streamSpec, "from");
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

import org.apache.flink.util.Preconditions;
import io.pravega.client.stream.ReaderGroupConfig;
import io.pravega.connectors.flink.util.FlinkPravegaUtils;
import io.pravega.connectors.flink.watermark.AssignerWithTimeWindows;
import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.SerializedValue;

import java.util.Optional;
@@ -25,7 +32,7 @@
/**
* An abstract streaming reader builder.
*
* The builder is abstracted to act as the base for both the {@link FlinkPravegaReader} and {@link FlinkPravegaTableSource} builders.
* The builder is abstracted to act as the base for both the {@link FlinkPravegaReader} builders.
*
* @param <T> the element type.
* @param <B> the builder type.
@@ -146,9 +153,6 @@ public B withMaxOutstandingCheckpointRequest(int maxOutstandingCheckpointRequest
/**
* Builds a {@link FlinkPravegaReader} based on the configuration.
*
* Note that the {@link FlinkPravegaTableSource} supports both the batch and streaming API, and so creates both
* a source function and an input format and then uses one or the other.
*
* Be sure to call {@code initialize()} before returning the reader to user code.
*
* @throws IllegalStateException if the configuration is invalid.
@@ -197,7 +201,8 @@ public ReaderGroupInfo buildReaderGroupInfo() {
return new ReaderGroupInfo(rgConfig, rgScope, rgName);
}

private boolean isReaderGroupNameAutoGenerated(String readerGroupName) {
@VisibleForTesting
static boolean isReaderGroupNameAutoGenerated(String readerGroupName) {
String pattern = "^flink\\w{20}$";
Matcher matcher = Pattern.compile(pattern).matcher(readerGroupName);
return matcher.matches();
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

@@ -17,7 +23,7 @@
/**
* An abstract streaming writer builder.
*
* The builder is abstracted to act as the base for both the {@link FlinkPravegaWriter} and {@link FlinkPravegaTableSink} builders.
* The builder is abstracted to act as the base for the {@link FlinkPravegaWriter} builder.
*
* @param <T> the element type.
* @param <B> the builder type.
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

import io.pravega.client.stream.Stream;
import lombok.Data;
import org.apache.flink.annotation.Internal;
import org.apache.flink.util.Preconditions;

@@ -115,12 +120,15 @@ public boolean isMetricsEnabled() {
/**
* A Pravega stream.
*/
@Data
private static class StreamSpec implements Serializable {
private static final long serialVersionUID = 1L;

private final String streamSpec;

private StreamSpec(String streamSpec) {
this.streamSpec = streamSpec;
}

public static StreamSpec of(String streamSpec) {
Preconditions.checkNotNull(streamSpec, "streamSpec");
return new StreamSpec(streamSpec);
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.ListTypeInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.util.ArrayList;
import java.util.List;

/**
* Orders elements into event time order using the managed timestamp state.
* Buffer the elements if they come early.
* The type of the key is String.
*
* @param <T> The input type of the function.
*/
public class EventTimeOrderingFunction<T> extends KeyedProcessFunction<String, T, T> {

private static final long serialVersionUID = 1L;

private static final String EVENT_QUEUE_STATE_NAME = "eventQueue";

private static final String LAST_TRIGGERING_TS_STATE_NAME = "lastTriggeringTsState";

/**
* The input type information for buffering events to managed state.
*/
private final TypeInformation<T> typeInformation;

/**
* State to buffer all the data between watermarks.
*/
private transient MapState<Long, List<T>> dataState;

/**
* State to keep the last triggering timestamp. Used to filter late events.
*/
private transient ValueState<Long> lastTriggeringTsState;

public EventTimeOrderingFunction(TypeInformation<T> typeInformation) {
this.typeInformation = typeInformation;
}

@Override
public void open(Configuration config) throws Exception {
super.open(config);

// create a map-based queue to buffer input elements
MapStateDescriptor<Long, List<T>> elementQueueStateDescriptor = new MapStateDescriptor<>(
EVENT_QUEUE_STATE_NAME,
BasicTypeInfo.LONG_TYPE_INFO,
new ListTypeInfo<>(this.typeInformation)
);
dataState = getRuntimeContext().getMapState(elementQueueStateDescriptor);

// maintain a timestamp so anything before this time will be ignored
ValueStateDescriptor<Long> lastTriggeringTsDescriptor =
new ValueStateDescriptor<>(LAST_TRIGGERING_TS_STATE_NAME, Long.class);
lastTriggeringTsState = getRuntimeContext().getState(lastTriggeringTsDescriptor);
}

@Override
public void processElement(T element, Context ctx, Collector<T> out) throws Exception {
// timestamp of the processed element
Long timestamp = ctx.timestamp();
if (timestamp == null) {
// Simply forward the elements when the time characteristic of the program is set to ProcessingTime.
out.collect(element);
return;
}

// In event-time processing we assume correctness of the watermark.
// Events with timestamp smaller than (or equal to) the last seen watermark are considered late.
// FUTURE: emit late elements to a side output

Long lastTriggeringTs = lastTriggeringTsState.value();

// check if the element is late and drop it if it is late
if (lastTriggeringTs == null || timestamp > lastTriggeringTs) {
List<T> elementsForTimestamp = dataState.get(timestamp);

if (elementsForTimestamp == null) {
elementsForTimestamp = new ArrayList<>(1);

// register event time timer, so the list will be outputted then
ctx.timerService().registerEventTimeTimer(timestamp);
}

elementsForTimestamp.add(element);

dataState.put(timestamp, elementsForTimestamp);
}
}

@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<T> out) throws Exception {
// gets all elements for the triggering timestamps
List<T> elements = dataState.get(timestamp);

if (elements != null) {
// emit elements in order
elements.forEach(out::collect);

// remove emitted elements from state
dataState.remove(timestamp);

// update the latest processing time
lastTriggeringTsState.update(timestamp);
}
}
}

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,37 +1,42 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink;

import io.pravega.connectors.flink.serialization.DeserializerFromSchemaRegistry;
import io.pravega.connectors.flink.serialization.PravegaDeserializationSchema;
import org.apache.flink.util.Preconditions;

import io.pravega.client.ClientConfig;
import io.pravega.client.BatchClientFactory;
import io.pravega.client.ClientConfig;
import io.pravega.client.batch.SegmentIterator;
import io.pravega.client.batch.SegmentRange;
import io.pravega.client.stream.Serializer;
import io.pravega.connectors.flink.serialization.DeserializerFromSchemaRegistry;
import io.pravega.connectors.flink.serialization.PravegaDeserializationSchema;
import io.pravega.connectors.flink.serialization.WrappingSerializer;
import io.pravega.connectors.flink.util.FlinkPravegaUtils;

import io.pravega.connectors.flink.util.StreamWithBoundaries;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.InputSplitAssigner;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
@@ -41,8 +46,8 @@
/**
* A Flink {@link InputFormat} that can be added as a source to read from Pravega in a Flink batch job.
*/
@Slf4j
public class FlinkPravegaInputFormat<T> extends RichInputFormat<T, PravegaInputSplit> {
private static final Logger LOG = LoggerFactory.getLogger(FlinkPravegaInputFormat.class);

private static final long serialVersionUID = 1L;

@@ -134,7 +139,7 @@ public PravegaInputSplit[] createInputSplits(int minNumSplits) throws IOExceptio
}
}

log.info("Prepared {} input splits", splits.size());
LOG.info("Prepared {} input splits", splits.size());
return splits.toArray(new PravegaInputSplit[splits.size()]);
}

Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink;
@@ -16,14 +22,15 @@
import io.pravega.client.stream.EventWriterConfig;
import io.pravega.client.stream.Serializer;
import io.pravega.client.stream.Stream;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.concurrent.CompletableFuture;
@@ -34,12 +41,10 @@

/**
* A Flink {@link OutputFormat} that can be added as a sink to write into Pravega. The current implementation does not
* support transactional writes. The {@link FlinkPravegaOutputFormat} is used in {@link FlinkPravegaTableSink}
* implementation to support writing to Pravega as part of batch {@link org.apache.flink.api.java.DataSet} operation.
* See io.pravega.connectors.flink.FlinkTableITCase for more details on how to use it.
* support transactional writes.
*/
@Slf4j
public class FlinkPravegaOutputFormat<T> extends RichOutputFormat<T> {
private static final Logger LOG = LoggerFactory.getLogger(FlinkPravegaOutputFormat.class);

private static final long serialVersionUID = 1L;

@@ -123,7 +128,7 @@ public void writeRecord(T record) throws IOException {
future.whenCompleteAsync(
(result, e) -> {
if (e != null) {
log.warn("Detected a write failure: {}", e);
LOG.warn("Detected a write failure: {}", e);

// We will record only the first error detected, since this will mostly likely help with
// finding the root cause. Storing all errors will not be feasible.
164 changes: 98 additions & 66 deletions src/main/java/io/pravega/connectors/flink/FlinkPravegaReader.java

Large diffs are not rendered by default.

This file was deleted.

This file was deleted.

145 changes: 0 additions & 145 deletions src/main/java/io/pravega/connectors/flink/FlinkPravegaTableSink.java

This file was deleted.

163 changes: 0 additions & 163 deletions src/main/java/io/pravega/connectors/flink/FlinkPravegaTableSource.java

This file was deleted.

71 changes: 61 additions & 10 deletions src/main/java/io/pravega/connectors/flink/FlinkPravegaWriter.java
Original file line number Diff line number Diff line change
@@ -1,27 +1,36 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.pravega.client.ClientConfig;
import io.pravega.client.EventStreamClientFactory;
import io.pravega.client.stream.EventStreamWriter;
import io.pravega.client.stream.TransactionalEventStreamWriter;
import io.pravega.client.stream.EventWriterConfig;
import io.pravega.client.stream.Serializer;
import io.pravega.client.stream.Stream;
import io.pravega.client.stream.Transaction;
import io.pravega.client.stream.TransactionalEventStreamWriter;
import io.pravega.client.stream.TxnFailedException;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton;
@@ -33,9 +42,10 @@
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction;
import org.apache.flink.streaming.api.operators.StreamingRuntimeContext;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.ByteBuffer;
@@ -52,9 +62,9 @@
*
* @param <T> The type of the event to be written.
*/
@Slf4j
public class FlinkPravegaWriter<T>
extends TwoPhaseCommitSinkFunction<T, FlinkPravegaWriter.PravegaTransactionState, Void> {
private static final Logger LOG = LoggerFactory.getLogger(FlinkPravegaWriter.class);

private static final long serialVersionUID = 1L;

@@ -148,6 +158,11 @@ protected FlinkPravegaWriter(
this.enableWatermark = enableWatermark;
this.enableMetrics = enableMetrics;
this.writerIdPrefix = UUID.randomUUID().toString();

if (writerMode == PravegaWriterMode.EXACTLY_ONCE) {
super.setTransactionTimeout(txnLeaseRenewalPeriod);
super.enableTransactionTimeoutWarnings(0.8);
}
}

/**
@@ -177,8 +192,10 @@ boolean getEnableWatermark() {

@Override
public void open(Configuration configuration) throws Exception {
serializationSchema.open(RuntimeContextInitializationContextAdapters.serializationAdapter(
getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
initializeInternalWriter();
log.info("Initialized Pravega writer {} for stream: {} with controller URI: {}", writerId(), stream, clientConfig.getControllerURI());
LOG.info("Initialized Pravega writer {} for stream: {} with controller URI: {}", writerId(), stream, clientConfig.getControllerURI());
if (enableMetrics) {
registerMetrics();
}
@@ -217,7 +234,7 @@ protected void invoke(PravegaTransactionState transaction, T event, Context cont
future.whenCompleteAsync(
(result, e) -> {
if (e != null) {
log.warn("Detected a write failure", e);
LOG.warn("Detected a write failure", e);

// We will record only the first error detected, since this will mostly likely help with
// finding the root cause. Storing all errors will not be feasible.
@@ -271,23 +288,31 @@ protected void preCommit(PravegaTransactionState transaction) throws Exception {
protected void commit(PravegaTransactionState transaction) {
switch (writerMode) {
case EXACTLY_ONCE:
// This may come from a job recovery from a non-transactional writer.
if (transaction.transactionId == null) {
break;
}
@SuppressWarnings("unchecked")
final Transaction<T> txn = transaction.getTransaction() != null ? transaction.getTransaction() :
transactionalWriter.getTxn(UUID.fromString(transaction.transactionId));
final Transaction.Status status = txn.checkStatus();
try {
final Transaction.Status status = txn.checkStatus();
if (status == Transaction.Status.OPEN) {
if (enableWatermark && transaction.watermark != null) {
txn.commit(transaction.watermark);
} else {
txn.commit();
}
} else {
log.warn("{} - Transaction {} has unexpected transaction status {} while committing",
LOG.warn("{} - Transaction {} has unexpected transaction status {} while committing",
writerId(), txn.getTxnId(), status);
}
} catch (TxnFailedException e) {
log.error("{} - Transaction {} commit failed.", writerId(), txn.getTxnId());
LOG.error("{} - Transaction {} commit failed.", writerId(), txn.getTxnId());
} catch (StatusRuntimeException e) {
if (e.getStatus() == Status.NOT_FOUND) {
LOG.error("{} - Transaction {} not found.", writerId(), txn.getTxnId());
}
}
break;
case ATLEAST_ONCE:
@@ -308,6 +333,10 @@ protected void recoverAndCommit(PravegaTransactionState transaction) {
protected void abort(PravegaTransactionState transaction) {
switch (writerMode) {
case EXACTLY_ONCE:
// This may come from a job recovery from a non-transactional writer.
if (transaction.transactionId == null) {
break;
}
@SuppressWarnings("unchecked")
final Transaction<T> txn = transaction.getTransaction() != null ? transaction.getTransaction() :
transactionalWriter.getTxn(UUID.fromString(transaction.transactionId));
@@ -553,6 +582,13 @@ Transaction getTransaction() {
return transaction;
}

@Override
public String toString() {
return String.format(
"%s [transactionId=%s, watermark=%s]",
this.getClass().getSimpleName(), transactionId, watermark);
}

@Override
public boolean equals(Object o) {
if (this == o) {
@@ -679,6 +715,21 @@ public TransactionStateSerializerSnapshot() {
}
}

/**
* Disables the propagation of exceptions thrown when committing presumably timed out Pravega
* transactions during recovery of the job. If a Pravega transaction is timed out, a commit will
* never be successful. Hence, use this feature to avoid recovery loops of the Job. Exceptions
* will still be logged to inform the user that data loss might have occurred.
*
* <p>Note that we use {@link System#currentTimeMillis()} to track the age of a transaction.
* Moreover, only exceptions thrown during the recovery are caught, i.e., the writer will
* attempt at least one commit of the transaction before giving up.
*/
@Override
public FlinkPravegaWriter<T> ignoreFailuresAfterTransactionTimeout() {
super.ignoreFailuresAfterTransactionTimeout();
return this;
}

// ------------------------------------------------------------------------
// builder
64 changes: 64 additions & 0 deletions src/main/java/io/pravega/connectors/flink/PravegaCollector.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.util.Collector;

import java.io.Serializable;
import java.util.ArrayDeque;
import java.util.Queue;

/**
* A Pravega collector that supports deserializing bytes to several events.
*/
public class PravegaCollector<T> implements Collector<T>, Serializable {
private static final long serialVersionUID = 1L;

private final DeserializationSchema<T> deserializationSchema;

private boolean endOfStreamSignalled = false;

// internal buffer
private final Queue<T> records = new ArrayDeque<>();

public PravegaCollector(DeserializationSchema<T> deserializationSchema) {
this.deserializationSchema = deserializationSchema;
}

@Override
public void collect(T record) {
// do not emit subsequent elements if the end of the stream reached
if (endOfStreamSignalled || deserializationSchema.isEndOfStream(record)) {
endOfStreamSignalled = true;
return;
}
records.add(record);
}

public Queue<T> getRecords() {
return records;
}

public boolean isEndOfStreamSignalled() {
return endOfStreamSignalled;
}

@Override
public void close() {
// do nothing here
}
}
28 changes: 25 additions & 3 deletions src/main/java/io/pravega/connectors/flink/PravegaConfig.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

import io.pravega.client.ClientConfig;
import io.pravega.client.stream.Stream;
import io.pravega.shared.security.auth.Credentials;
import lombok.Data;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.util.Preconditions;

@@ -259,7 +264,6 @@ public PravegaConfig withHostnameValidation(boolean validateHostname) {
/**
* A configuration parameter resolvable via command-line parameters, system properties, or OS environment variables.
*/
@Data
static class PravegaParameter implements Serializable {

private static final long serialVersionUID = 1L;
@@ -268,6 +272,24 @@ static class PravegaParameter implements Serializable {
private final String propertyName;
private final String variableName;

PravegaParameter(String parameterName, String propertyName, String variableName) {
this.parameterName = parameterName;
this.propertyName = propertyName;
this.variableName = variableName;
}

public String getParameterName() {
return parameterName;
}

public String getPropertyName() {
return propertyName;
}

public String getVariableName() {
return variableName;
}

public Optional<String> resolve(ParameterTool parameters, Properties properties, Map<String, String> variables) {
if (parameters != null && parameters.has(parameterName)) {
return Optional.of(parameters.get(parameterName));
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

10 changes: 8 additions & 2 deletions src/main/java/io/pravega/connectors/flink/PravegaInputSplit.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink;

import org.apache.flink.util.Preconditions;
import io.pravega.client.batch.SegmentRange;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.util.Preconditions;

/**
* A {@link PravegaInputSplit} corresponds to a Pravega {@link SegmentRange}.
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

32 changes: 22 additions & 10 deletions src/main/java/io/pravega/connectors/flink/ReaderCheckpointHook.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink;

@@ -14,10 +20,12 @@
import io.pravega.client.stream.Checkpoint;
import io.pravega.client.stream.ReaderGroup;
import io.pravega.client.stream.ReaderGroupConfig;
import lombok.extern.slf4j.Slf4j;
import io.pravega.client.stream.ReaderGroupNotFoundException;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.concurrent.GuardedBy;
import java.util.concurrent.CompletableFuture;
@@ -33,8 +41,8 @@
* The hook executed in Flink's Checkpoint Coordinator that triggers and restores
* checkpoints in on a Pravega ReaderGroup.
*/
@Slf4j
class ReaderCheckpointHook implements MasterTriggerRestoreHook<Checkpoint> {
private static final Logger LOG = LoggerFactory.getLogger(ReaderCheckpointHook.class);

/** The prefix of checkpoint names */
private static final String PRAVEGA_CHECKPOINT_NAME_PREFIX = "PVG-CHK-";
@@ -82,8 +90,12 @@ class ReaderCheckpointHook implements MasterTriggerRestoreHook<Checkpoint> {
// ------------------------------------------------------------------------
protected void initializeReaderGroup(String readerGroupName, String readerGroupScope, ClientConfig clientConfig) {
readerGroupManager = ReaderGroupManager.withScope(readerGroupScope, clientConfig);
readerGroupManager.createReaderGroup(readerGroupName, readerGroupConfig);
readerGroup = readerGroupManager.getReaderGroup(readerGroupName);
try {
readerGroup = readerGroupManager.getReaderGroup(readerGroupName);
} catch (ReaderGroupNotFoundException e) {
readerGroupManager.createReaderGroup(readerGroupName, readerGroupConfig);
readerGroup = readerGroupManager.getReaderGroup(readerGroupName);
}
}

@Override
@@ -127,22 +139,22 @@ public void restoreCheckpoint(long checkpointId, Checkpoint checkpoint) throws E
public void reset() {
// To avoid the data loss, reset the reader group using the reader config that was initially passed to the job.
// This can happen when the job recovery happens after a failure but no checkpoint has been taken.
log.info("resetting the reader group to initial state using the RG config {}", this.readerGroupConfig);
LOG.info("resetting the reader group to initial state using the RG config {}", this.readerGroupConfig);
this.readerGroup.resetReaderGroup(this.readerGroupConfig);
}

@Override
public void close() {
log.info("closing reader group Manager");
LOG.info("closing reader group Manager");
this.readerGroupManager.close();

// close the reader group properly
log.info("closing the reader group");
LOG.info("closing the reader group");
this.readerGroup.close();

synchronized (scheduledExecutorLock) {
if (scheduledExecutorService != null ) {
log.info("Closing Scheduled Executor for hook {}", hookUid);
LOG.info("Closing Scheduled Executor for hook {}", hookUid);
scheduledExecutorService.shutdownNow();
scheduledExecutorService = null;
}
@@ -161,7 +173,7 @@ public SimpleVersionedSerializer<Checkpoint> createCheckpointDataSerializer() {
private void ensureScheduledExecutorExists() {
synchronized (scheduledExecutorLock) {
if (scheduledExecutorService == null) {
log.info("Creating Scheduled Executor for hook {}", hookUid);
LOG.info("Creating Scheduled Executor for hook {}", hookUid);
scheduledExecutorService = createScheduledExecutorService();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.dynamic.table;

import io.pravega.client.stream.EventRead;
import io.pravega.connectors.flink.dynamic.table.FlinkPravegaDynamicTableSource.ReadableMetadata;
import io.pravega.connectors.flink.serialization.PravegaDeserializationSchemaWithMetadata;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.util.Collector;

import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.List;

import static io.pravega.connectors.flink.util.FlinkPravegaUtils.byteBufferToArray;

/** A specific {@link PravegaDeserializationSchemaWithMetadata} for {@link FlinkPravegaDynamicTableSource}. */
public class FlinkPravegaDynamicDeserializationSchema extends PravegaDeserializationSchemaWithMetadata<RowData> {
private final TypeInformation<RowData> typeInfo;

private final DeserializationSchema<RowData> nestedSchema;

// the custom collector that adds metadata to the rows
private final OutputCollector outputCollector;

public FlinkPravegaDynamicDeserializationSchema(
TypeInformation<RowData> typeInfo,
int physicalArity,
List<String> metadataKeys,
DeserializationSchema<RowData> nestedSchema) {
this.typeInfo = typeInfo;
this.nestedSchema = nestedSchema;
this.outputCollector = new OutputCollector(metadataKeys, physicalArity);
}

@Override
public void open(InitializationContext context) throws Exception {
this.nestedSchema.open(context);
}

@Override
public RowData deserialize(byte[] message) throws IOException {
return this.nestedSchema.deserialize(message);
}

@Override
public void deserialize(byte[] message, Collector<RowData> out) throws IOException {
this.nestedSchema.deserialize(message, out);
}

@Override
public RowData deserialize(byte[] message, EventRead<ByteBuffer> eventRead) throws IOException {
throw new IllegalStateException("Please invoke FlinkPravegaDynamicDeserializationSchema#deserialize(byte[], EventRead<ByteBuffer>, Collector<RowData>) instead.");
}

@Override
public void deserialize(byte[] message, EventRead<ByteBuffer> eventRead, Collector<RowData> out) throws IOException {
this.outputCollector.eventRead = eventRead;
this.outputCollector.out = out;

this.deserialize(message, this.outputCollector);
}

@Override
public boolean isEndOfStream(RowData nextElement) {
return false;
}

@Override
public TypeInformation<RowData> getProducedType() {
return this.typeInfo;
}

private static final class OutputCollector implements Collector<RowData>, Serializable {
private static final long serialVersionUID = 1L;

// the original collector which need both original and metadata keys
public transient Collector<RowData> out;

// where we get the event pointer from
public transient EventRead<ByteBuffer> eventRead;

// metadata keys that the rowData have and is a subset of ReadableMetadata
private final List<String> metadataKeys;

// source datatype arity without metadata
private final int physicalArity;

private OutputCollector(List<String> metadataKeys, int physicalArity) {
this.metadataKeys = metadataKeys;
this.physicalArity = physicalArity;
}

@Override
public void collect(RowData record) {
if (this.metadataKeys.size() == 0 || record != null) {
record = enrichWithMetadata(record, eventRead);
}

out.collect(record);
}

@Override
public void close() {
// nothing to do
}

public RowData enrichWithMetadata(RowData rowData, EventRead<ByteBuffer> eventRead) {
// use GenericRowData to manipulate rowData's field
final GenericRowData producedRow = new GenericRowData(rowData.getRowKind(), physicalArity + metadataKeys.size());

// set the physical(original) field
final GenericRowData physicalRow = (GenericRowData) rowData;
int pos = 0;
for (; pos < physicalArity; pos++) {
producedRow.setField(pos, physicalRow.getField(pos));
}

// set the virtual(metadata) field after the physical field, no effect if the key is not supported
for (; pos < physicalArity + metadataKeys.size(); pos++) {
String metadataKey = metadataKeys.get(pos - physicalArity);
if (ReadableMetadata.EVENT_POINTER.key.equals(metadataKey)) {
producedRow.setField(pos, byteBufferToArray(eventRead.getEventPointer().toBytes()));
}
}

return producedRow;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.dynamic.table;

@@ -19,7 +25,6 @@
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.data.RowData;

import org.apache.flink.table.factories.DeserializationFormatFactory;
import org.apache.flink.table.factories.DynamicTableSinkFactory;
import org.apache.flink.table.factories.DynamicTableSourceFactory;
@@ -56,23 +61,23 @@ public DynamicTableSource createDynamicTableSource(Context context) {

// Validation
helper.validate();
PravegaOptions.validateTableSourceOptions(tableOptions);
PravegaOptionsUtil.validateTableSourceOptions(tableOptions);

DataType producedDataType = context.getCatalogTable().getSchema().toPhysicalRowDataType();

return new FlinkPravegaDynamicTableSource(
producedDataType,
decodingFormat,
PravegaOptions.getReaderGroupName(tableOptions),
PravegaOptions.getPravegaConfig(tableOptions),
PravegaOptions.resolveScanStreams(tableOptions),
PravegaOptions.getReaderGroupRefreshTimeMillis(tableOptions),
PravegaOptions.getCheckpointInitiateTimeoutMillis(tableOptions),
PravegaOptions.getEventReadTimeoutMillis(tableOptions),
PravegaOptions.getMaxOutstandingCheckpointRequest(tableOptions),
PravegaOptions.getUid(tableOptions),
PravegaOptions.isStreamingReader(tableOptions),
PravegaOptions.isBoundedRead(tableOptions));
PravegaOptionsUtil.getReaderGroupName(tableOptions),
PravegaOptionsUtil.getPravegaConfig(tableOptions),
PravegaOptionsUtil.resolveScanStreams(tableOptions),
PravegaOptionsUtil.getReaderGroupRefreshTimeMillis(tableOptions),
PravegaOptionsUtil.getCheckpointInitiateTimeoutMillis(tableOptions),
PravegaOptionsUtil.getEventReadTimeoutMillis(tableOptions),
PravegaOptionsUtil.getMaxOutstandingCheckpointRequest(tableOptions),
PravegaOptionsUtil.getUid(tableOptions),
PravegaOptionsUtil.isStreamingReader(tableOptions),
PravegaOptionsUtil.isBoundedRead(tableOptions));
}

@Override
@@ -87,26 +92,26 @@ public DynamicTableSink createDynamicTableSink(Context context) {

// Validation
helper.validate();
PravegaOptions.validateTableSinkOptions(tableOptions);
PravegaOptionsUtil.validateTableSinkOptions(tableOptions);

TableSchema tableSchema = TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());

return new FlinkPravegaDynamicTableSink(
tableSchema,
encodingFormat,
PravegaOptions.getPravegaConfig(tableOptions),
PravegaOptions.getSinkStream(tableOptions),
PravegaOptions.getWriterMode(tableOptions),
PravegaOptions.getTransactionLeaseRenewalIntervalMillis(tableOptions),
PravegaOptions.isWatermarkPropagationEnabled(tableOptions),
PravegaOptions.getRoutingKeyField(tableOptions));
PravegaOptionsUtil.getPravegaConfig(tableOptions),
PravegaOptionsUtil.getSinkStream(tableOptions),
PravegaOptionsUtil.getWriterMode(tableOptions),
PravegaOptionsUtil.getTransactionLeaseRenewalIntervalMillis(tableOptions),
PravegaOptionsUtil.isWatermarkPropagationEnabled(tableOptions),
PravegaOptionsUtil.getRoutingKeyField(tableOptions));
}

@Override
public Set<ConfigOption<?>> requiredOptions() {
final Set<ConfigOption<?>> options = new HashSet<>();
options.add(FactoryUtil.FORMAT);
options.add(CONTROLLER_URL);
options.add(CONTROLLER_URI);
options.add(SCOPE);
return options;
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.dynamic.table;

@@ -16,19 +22,19 @@
import io.pravega.connectors.flink.PravegaWriterMode;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.format.EncodingFormat;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.sink.SinkFunctionProvider;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.util.Preconditions;

import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Objects;
import java.util.Optional;

import static org.apache.flink.util.Preconditions.checkArgument;

@@ -56,7 +62,8 @@ public class FlinkPravegaDynamicTableSink implements DynamicTableSink {
private final boolean enableWatermarkPropagation;

// Pravega routing key field name
private final Optional<String> routingKeyFieldName;
@Nullable
private final String routingKeyFieldName;

/**
* Creates a Pravega {@link DynamicTableSink}.
@@ -80,7 +87,7 @@ public FlinkPravegaDynamicTableSink(TableSchema tableSchema,
PravegaWriterMode writerMode,
long txnLeaseRenewalIntervalMillis,
boolean enableWatermarkPropagation,
Optional<String> routingKeyFieldName) {
@Nullable String routingKeyFieldName) {
this.tableSchema = Preconditions.checkNotNull(tableSchema, "Table schema must not be null.");
this.encodingFormat = Preconditions.checkNotNull(encodingFormat, "Encoding format must not be null.");
this.pravegaConfig = Preconditions.checkNotNull(pravegaConfig, "Pravega config must not be null.");
@@ -106,9 +113,10 @@ public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
.enableWatermark(enableWatermarkPropagation)
.withTxnLeaseRenewalPeriod(Time.milliseconds(txnLeaseRenewalIntervalMillis));

routingKeyFieldName.ifPresent(name -> {
writerBuilder.withEventRouter(new RowDataBasedRouter(name, tableSchema));
});
if (routingKeyFieldName != null) {
writerBuilder.withEventRouter(new RowDataBasedRouter(routingKeyFieldName, tableSchema));
}

return SinkFunctionProvider.of(writerBuilder.build());
}

@@ -171,13 +179,17 @@ public static class RowDataBasedRouter implements PravegaEventRouter<RowData> {

public RowDataBasedRouter(String routingKeyFieldName, TableSchema tableSchema) {
String[] fieldNames = tableSchema.getFieldNames();
DataType[] fieldTypes = tableSchema.getFieldDataTypes();

int keyIndex = Arrays.asList(fieldNames).indexOf(routingKeyFieldName);

checkArgument(keyIndex >= 0,
"Key field '" + routingKeyFieldName + "' not found");
checkArgument(DataTypes.STRING().equals(fieldTypes[keyIndex]),
"Key field must be of type 'STRING'");

DataType[] fieldTypes = tableSchema.getFieldDataTypes();
LogicalTypeRoot logicalTypeRoot = fieldTypes[keyIndex].getLogicalType().getTypeRoot();

checkArgument(LogicalTypeRoot.CHAR == logicalTypeRoot || LogicalTypeRoot.VARCHAR == logicalTypeRoot,
"Key field must be of string type");

this.keyIndex = keyIndex;
}

Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.dynamic.table;

@@ -15,29 +21,46 @@
import io.pravega.connectors.flink.util.StreamWithBoundaries;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.InputFormatProvider;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceFunctionProvider;
import org.apache.flink.table.connector.source.abilities.SupportsReadingMetadata;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.types.DataType;
import org.apache.flink.util.Preconditions;

import javax.annotation.Nullable;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class FlinkPravegaDynamicTableSource implements ScanTableSource {
public class FlinkPravegaDynamicTableSource implements ScanTableSource, SupportsReadingMetadata {

private static final String FORMAT_METADATA_PREFIX = "from_format.";

// Source produced data type
private final DataType producedDataType;
protected DataType producedDataType;

// Data type to configure the format
private final DataType physicalDataType;

// Metadata that is appended at the end of a physical source row
private List<String> metadataKeys;

// Scan format for decoding records from Pravega
private final DecodingFormat<DeserializationSchema<RowData>> decodingFormat;

// The reader group name to coordinate the parallel readers. This should be unique for a Flink job.
@Nullable
private final String readerGroupName;

// Pravega connection configuration
@@ -59,7 +82,8 @@ public class FlinkPravegaDynamicTableSource implements ScanTableSource {
private final int maxOutstandingCheckpointRequest;

// Uid of the table source to identify the checkpoint state
private final Optional<String> uid;
@Nullable
private final String uid;

// Flag to determine streaming or batch read
private final boolean isStreamingReader;
@@ -69,7 +93,7 @@ public class FlinkPravegaDynamicTableSource implements ScanTableSource {

/**
* Creates a Pravega {@link DynamicTableSource}.
* @param producedDataType source produced data type
* @param physicalDataType source produced data type
* @param decodingFormat scan format for decoding records from Pravega
* @param readerGroupName the reader group name
* @param pravegaConfig Pravega connection configuration
@@ -82,7 +106,7 @@ public class FlinkPravegaDynamicTableSource implements ScanTableSource {
* @param isStreamingReader flag to determine streaming or batch read
* @param isBounded flag to determine if the source stream is bounded
*/
public FlinkPravegaDynamicTableSource(DataType producedDataType,
public FlinkPravegaDynamicTableSource(DataType physicalDataType,
DecodingFormat<DeserializationSchema<RowData>> decodingFormat,
String readerGroupName,
PravegaConfig pravegaConfig,
@@ -91,15 +115,52 @@ public FlinkPravegaDynamicTableSource(DataType producedDataType,
long checkpointInitiateTimeoutMillis,
long eventReadTimeoutMillis,
int maxOutstandingCheckpointRequest,
Optional<String> uid,
String uid,
boolean isStreamingReader,
boolean isBounded) {
this(
physicalDataType,
// producedDataType should be the same as physicalDataType on initialization
// and will be updated on `applyReadableMetadata`
physicalDataType,
// metadataKeys will be empty on initialization and will be updated on `applyReadableMetadata`
Collections.emptyList(),
decodingFormat,
readerGroupName,
pravegaConfig,
streams,
readerGroupRefreshTimeMillis,
checkpointInitiateTimeoutMillis,
eventReadTimeoutMillis,
maxOutstandingCheckpointRequest,
uid,
isStreamingReader,
isBounded
);
}

FlinkPravegaDynamicTableSource(DataType physicalDataType,
DataType producedDataType,
List<String> metadataKeys,
DecodingFormat<DeserializationSchema<RowData>> decodingFormat,
String readerGroupName,
PravegaConfig pravegaConfig,
List<StreamWithBoundaries> streams,
long readerGroupRefreshTimeMillis,
long checkpointInitiateTimeoutMillis,
long eventReadTimeoutMillis,
int maxOutstandingCheckpointRequest,
String uid,
boolean isStreamingReader,
boolean isBounded) {
this.physicalDataType = Preconditions.checkNotNull(
physicalDataType, "Physical data type must not be null.");
this.producedDataType = Preconditions.checkNotNull(
producedDataType, "Produced data type must not be null.");
this.decodingFormat = Preconditions.checkNotNull(
decodingFormat, "Decoding format must not be null.");
Preconditions.checkArgument(!isStreamingReader || readerGroupName != null,
"Reader group name is required in streaming mode");
this.metadataKeys = Preconditions.checkNotNull(
metadataKeys, "Metadata Keys must not be null.");
this.readerGroupName = readerGroupName;
this.pravegaConfig = Preconditions.checkNotNull(
pravegaConfig, "Pravega config must not be null.");
@@ -121,27 +182,36 @@ public ChangelogMode getChangelogMode() {

@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
// create a PravegaDeserializationSchema that will expose metadata to the row
final FlinkPravegaDynamicDeserializationSchema deserializationSchema
= new FlinkPravegaDynamicDeserializationSchema(
runtimeProviderContext.createTypeInformation(producedDataType),
producedDataType.getChildren().size() - metadataKeys.size(),
metadataKeys,
decodingFormat.createRuntimeDecoder(runtimeProviderContext, physicalDataType));

if (isStreamingReader) {
FlinkPravegaReader.Builder<RowData> readerBuilder = FlinkPravegaReader.<RowData>builder()
.withPravegaConfig(pravegaConfig)
.withReaderGroupName(readerGroupName)
.withDeserializationSchema(decodingFormat.createRuntimeDecoder(runtimeProviderContext, producedDataType))
.withDeserializationSchema(deserializationSchema)
.withReaderGroupRefreshTime(Time.milliseconds(readerGroupRefreshTimeMillis))
.withCheckpointInitiateTimeout(Time.milliseconds(checkpointInitiateTimeoutMillis))
.withEventReadTimeout(Time.milliseconds(eventReadTimeoutMillis))
.withMaxOutstandingCheckpointRequest(maxOutstandingCheckpointRequest);
Optional.ofNullable(readerGroupName).ifPresent(readerBuilder::withReaderGroupName);

for (StreamWithBoundaries stream : streams) {
readerBuilder.forStream(stream.getStream(), stream.getFrom(), stream.getTo());
}

String generatedUid = readerBuilder.generateUid();
readerBuilder.uid(uid.orElse(generatedUid));
readerBuilder.uid(uid == null ? readerBuilder.generateUid() : uid);

return SourceFunctionProvider.of(readerBuilder.build(), isBounded);
} else {
FlinkPravegaInputFormat.Builder<RowData> inputFormatBuilder = FlinkPravegaInputFormat.<RowData>builder()
.withPravegaConfig(pravegaConfig)
.withDeserializationSchema(decodingFormat.createRuntimeDecoder(runtimeProviderContext, producedDataType));
FlinkPravegaInputFormat.Builder<RowData> inputFormatBuilder =
FlinkPravegaInputFormat.<RowData>builder()
.withPravegaConfig(pravegaConfig)
.withDeserializationSchema(deserializationSchema);

for (StreamWithBoundaries stream : streams) {
inputFormatBuilder.forStream(stream.getStream(), stream.getFrom(), stream.getTo());
@@ -154,7 +224,9 @@ public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderCon
@Override
public DynamicTableSource copy() {
return new FlinkPravegaDynamicTableSource(
this.physicalDataType,
this.producedDataType,
this.metadataKeys,
this.decodingFormat,
this.readerGroupName,
this.pravegaConfig,
@@ -189,18 +261,22 @@ public boolean equals(Object o) {
isStreamingReader == that.isStreamingReader &&
isBounded == that.isBounded &&
producedDataType.equals(that.producedDataType) &&
physicalDataType.equals(that.physicalDataType) &&
decodingFormat.equals(that.decodingFormat) &&
readerGroupName.equals(that.readerGroupName) &&
metadataKeys.equals(that.metadataKeys) &&
Objects.equals(readerGroupName, that.readerGroupName) &&
pravegaConfig.equals(that.pravegaConfig) &&
streams.equals(that.streams) &&
uid.equals(that.uid);
Objects.equals(uid, that.uid);
}

@Override
public int hashCode() {
return Objects.hash(
producedDataType,
physicalDataType,
decodingFormat,
metadataKeys,
readerGroupName,
pravegaConfig,
streams,
@@ -212,4 +288,66 @@ public int hashCode() {
isStreamingReader,
isBounded);
}

@Override
public Map<String, DataType> listReadableMetadata() {
final Map<String, DataType> metadataMap = new LinkedHashMap<>();

// according to convention, the order of the final row must be
// PHYSICAL + FORMAT METADATA + CONNECTOR METADATA
// where the format metadata has highest precedence

// add value format metadata with prefix
this.decodingFormat
.listReadableMetadata()
.forEach((key, value) -> metadataMap.put(FORMAT_METADATA_PREFIX + key, value));

// add connector metadata
Stream.of(ReadableMetadata.values())
.forEachOrdered(m -> metadataMap.put(m.key, m.dataType));

return metadataMap;
}

@Override
public void applyReadableMetadata(List<String> metadataKeys, DataType producedDataType) {
// separate connector and format metadata
Map<Boolean, List<String>> partitions = metadataKeys
.stream()
.collect(Collectors.partitioningBy(key -> key.startsWith(FORMAT_METADATA_PREFIX)));

// push down format metadata
final Map<String, DataType> formatMetadata = this.decodingFormat.listReadableMetadata();
if (formatMetadata.size() > 0) {
this.decodingFormat
.applyReadableMetadata(partitions.get(true)
.stream()
.map(k -> k.substring(FORMAT_METADATA_PREFIX.length()))
.collect(Collectors.toList()));
}

this.metadataKeys = partitions.get(false);
this.producedDataType = producedDataType;
}

enum ReadableMetadata {
EVENT_POINTER(
"event_pointer",
DataTypes.BYTES().notNull()
);

final String key;

final DataType dataType;

ReadableMetadata(String key, DataType dataType) {
this.key = key;
this.dataType = dataType;
}
}

@Override
public boolean supportsMetadataProjection() {
return false;
}
}
Original file line number Diff line number Diff line change
@@ -1,44 +1,32 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.dynamic.table;

import io.pravega.client.stream.Stream;
import io.pravega.client.stream.StreamCut;
import io.pravega.connectors.flink.PravegaConfig;
import io.pravega.connectors.flink.PravegaWriterMode;
import io.pravega.connectors.flink.util.FlinkPravegaUtils;
import io.pravega.connectors.flink.util.StreamWithBoundaries;
import io.pravega.shared.NameUtils;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.ValidationException;

import java.net.URI;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;

import static io.pravega.connectors.flink.util.FlinkPravegaUtils.isCredentialsLoadDynamic;

public class PravegaOptions {
// --------------------------------------------------------------------------------------------
// Connection specific options
// --------------------------------------------------------------------------------------------

public static final ConfigOption<String> CONTROLLER_URL = ConfigOptions
public static final ConfigOption<String> CONTROLLER_URI = ConfigOptions
.key("controller-uri")
.stringType()
.noDefaultValue()
@@ -177,218 +165,5 @@ public class PravegaOptions {
.noDefaultValue()
.withDescription("Optional field name to use as a Pravega event routing key, field type must be STRING, random routing if not specified");

// --------------------------------------------------------------------------------------------
// Option enumerations
// --------------------------------------------------------------------------------------------

public static final String SCAN_EXECUTION_TYPE_VALUE_STREAMING = "streaming";
public static final String SCAN_EXECUTION_TYPE_VALUE_BATCH = "batch";

public static final String SINK_SEMANTIC_VALUE_EXACTLY_ONCE = "exactly-once";
public static final String SINK_SEMANTIC_VALUE_AT_LEAST_ONCE = "at-least-once";
public static final String SINK_SEMANTIC_VALUE_BEST_EFFORT = "best-effort";

private static final Set<String> SCAN_EXECUTION_TYPE_ENUMS = new HashSet<>(Arrays.asList(
SCAN_EXECUTION_TYPE_VALUE_STREAMING,
SCAN_EXECUTION_TYPE_VALUE_BATCH
));

private static final Set<String> SINK_SEMANTIC_ENUMS = new HashSet<>(Arrays.asList(
SINK_SEMANTIC_VALUE_AT_LEAST_ONCE,
SINK_SEMANTIC_VALUE_EXACTLY_ONCE,
SINK_SEMANTIC_VALUE_BEST_EFFORT
));

private PravegaOptions() {}

// --------------------------------------------------------------------------------------------
// Validation
// --------------------------------------------------------------------------------------------

public static void validateTableSourceOptions(ReadableConfig tableOptions) {
validateScanExecutionType(tableOptions);
validateSourceStreams(tableOptions);
if (tableOptions.get(SCAN_EXECUTION_TYPE).equals(SCAN_EXECUTION_TYPE_VALUE_STREAMING)) {
validateReaderGroup(tableOptions);
}
}

public static void validateTableSinkOptions(ReadableConfig tableOptions) {
validateSinkStream(tableOptions);
validateSinkSemantic(tableOptions);
}

private static void validateScanExecutionType(ReadableConfig tableOptions) {
tableOptions.getOptional(SCAN_EXECUTION_TYPE).ifPresent(type -> {
if (!SCAN_EXECUTION_TYPE_ENUMS.contains(type)) {
throw new ValidationException(
String.format("Unsupported value '%s' for '%s'. Supported values are ['streaming', 'batch'].",
type, SCAN_EXECUTION_TYPE.key()));
}
});
}

private static void validateSourceStreams(ReadableConfig tableOptions) {
List<String> streams = tableOptions.getOptional(SCAN_STREAMS)
.orElseThrow(() -> new ValidationException(String.format("'%s' is required but missing", SCAN_STREAMS.key())));

streams.forEach(NameUtils::validateStreamName);

tableOptions.getOptional(SCAN_START_STREAMCUTS).ifPresent(streamCuts -> {
if (streamCuts.size() != streams.size()) {
throw new ValidationException(
String.format("Start stream cuts are not matching the number of streams, having %d, expected %d",
streamCuts.size(), streams.size()));
}
});

tableOptions.getOptional(SCAN_END_STREAMCUTS).ifPresent(streamCuts -> {
if (streamCuts.size() != streams.size()) {
throw new ValidationException(
String.format("End stream cuts are not matching the number of streams, having %d, expected %d",
streamCuts.size(), streams.size()));
}
});
}

private static void validateReaderGroup(ReadableConfig tableOptions) {
Optional<String> readerGroupName = tableOptions.getOptional(SCAN_READER_GROUP_NAME);
if (!readerGroupName.isPresent()) {
throw new ValidationException(String.format("'%s' is required but missing", SCAN_READER_GROUP_NAME.key()));
} else {
NameUtils.validateReaderGroupName(readerGroupName.get());
}

tableOptions.getOptional(SCAN_READER_GROUP_MAX_OUTSTANDING_CHECKPOINT_REQUEST).ifPresent(num -> {
if (num < 1) {
throw new ValidationException(String.format("'%s' requires a positive integer, received %d",
SCAN_READER_GROUP_MAX_OUTSTANDING_CHECKPOINT_REQUEST.key(), num));
}
});
}

private static void validateSinkSemantic(ReadableConfig tableOptions) {
tableOptions.getOptional(SINK_SEMANTIC).ifPresent(semantic -> {
if (!SINK_SEMANTIC_ENUMS.contains(semantic)) {
throw new ValidationException(
String.format("Unsupported value '%s' for '%s'. Supported values are ['at-least-once', 'exactly-once', 'best-effort'].",
semantic, SINK_SEMANTIC.key()));
}
});
}

private static void validateSinkStream(ReadableConfig tableOptions) {
String stream = tableOptions.getOptional(SINK_STREAM)
.orElseThrow(() -> new ValidationException(String.format("'%s' is required but missing", SINK_STREAM.key())));
NameUtils.validateStreamName(stream);
}

// --------------------------------------------------------------------------------------------
// Utilities
// --------------------------------------------------------------------------------------------

// ------------------------------------- Common ----------------------------------------

public static PravegaConfig getPravegaConfig(ReadableConfig tableOptions) {
PravegaConfig pravegaConfig = PravegaConfig.fromDefaults()
.withControllerURI(URI.create(tableOptions.get(CONTROLLER_URL)))
.withDefaultScope(tableOptions.get(SCOPE))
.withHostnameValidation(tableOptions.get(SECURITY_VALIDATE_HOSTNAME))
.withTrustStore(tableOptions.get(SECURITY_TRUST_STORE));

Optional<String> authType = tableOptions.getOptional(SECURITY_AUTH_TYPE);
Optional<String> authToken = tableOptions.getOptional(SECURITY_AUTH_TOKEN);
if (authType.isPresent() && authToken.isPresent() && !isCredentialsLoadDynamic()) {
pravegaConfig.withCredentials(new FlinkPravegaUtils.SimpleCredentials(authType.get(), authToken.get()));
}

return pravegaConfig;
}

// ------------------------------------- Reader ----------------------------------------

public static boolean isStreamingReader(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_EXECUTION_TYPE).equals(SCAN_EXECUTION_TYPE_VALUE_STREAMING);
}

public static String getReaderGroupName(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_NAME);
}

public static Optional<String> getUid(ReadableConfig tableOptions) {
return tableOptions.getOptional(SCAN_UID);
}

public static long getReaderGroupRefreshTimeMillis(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_REFRESH_INTERVAL).toMillis();
}

public static long getCheckpointInitiateTimeoutMillis(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_CHECKPOINT_INITIATE_TIMEOUT_INTERVAL).toMillis();
}

public static long getEventReadTimeoutMillis(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_EVENT_READ_TIMEOUT_INTERVAL).toMillis();
}

public static int getMaxOutstandingCheckpointRequest(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_MAX_OUTSTANDING_CHECKPOINT_REQUEST);
}

public static boolean isBoundedRead(ReadableConfig tableOptions) {
Optional<List<String>> endStreamCuts = tableOptions.getOptional(SCAN_END_STREAMCUTS);
return endStreamCuts.isPresent() &&
endStreamCuts.get().stream().noneMatch(cut -> cut.equals(StreamCut.UNBOUNDED.asText()));
}

public static List<StreamWithBoundaries> resolveScanStreams(ReadableConfig tableOptions) {
String scope = tableOptions.get(SCOPE);
List<String> streams = tableOptions.getOptional(SCAN_STREAMS)
.orElseThrow(() -> new TableException("Validator should have checked that"));
List<String> startStreamCuts = tableOptions.get(SCAN_START_STREAMCUTS);
List<String> endStreamCuts = tableOptions.get(SCAN_END_STREAMCUTS);
List<StreamWithBoundaries> result = new ArrayList<>();

for (int i = 0; i < streams.size(); i++) {
Stream stream = Stream.of(scope, streams.get(i));
StreamCut from = startStreamCuts == null ? StreamCut.UNBOUNDED : StreamCut.from(startStreamCuts.get(i));
StreamCut to = endStreamCuts == null ? StreamCut.UNBOUNDED : StreamCut.from(endStreamCuts.get(i));
result.add(new StreamWithBoundaries(stream, from, to));
}

return result;
}

// ------------------------------------- Writer ----------------------------------------

public static Stream getSinkStream(ReadableConfig tableOptions) {
String scope = tableOptions.get(SCOPE);
String stream = tableOptions.get(SINK_STREAM);
return Stream.of(scope, stream);
}

public static PravegaWriterMode getWriterMode(ReadableConfig tableOptions) {
switch (tableOptions.get(SINK_SEMANTIC)) {
case SINK_SEMANTIC_VALUE_EXACTLY_ONCE:
return PravegaWriterMode.EXACTLY_ONCE;
case SINK_SEMANTIC_VALUE_AT_LEAST_ONCE:
return PravegaWriterMode.ATLEAST_ONCE;
case SINK_SEMANTIC_VALUE_BEST_EFFORT:
return PravegaWriterMode.BEST_EFFORT;
default:
throw new TableException("Validator should have checked that");
}
}

public static long getTransactionLeaseRenewalIntervalMillis(ReadableConfig tableOptions) {
return tableOptions.get(SINK_TXN_LEASE_RENEWAL_INTERVAL).toMillis();
}

public static boolean isWatermarkPropagationEnabled(ReadableConfig tableOptions) {
return tableOptions.get(SINK_ENABLE_WATERMARK_PROPAGATION);
}

public static Optional<String> getRoutingKeyField(ReadableConfig tableOptions) {
return tableOptions.getOptional(SINK_ROUTINGKEY_FIELD_NAME);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.dynamic.table;

import io.pravega.client.stream.Stream;
import io.pravega.client.stream.StreamCut;
import io.pravega.connectors.flink.PravegaConfig;
import io.pravega.connectors.flink.PravegaWriterMode;
import io.pravega.connectors.flink.util.FlinkPravegaUtils;
import io.pravega.connectors.flink.util.StreamWithBoundaries;
import io.pravega.shared.NameUtils;
import org.apache.flink.annotation.Internal;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.ValidationException;

import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;

import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.CONTROLLER_URI;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_END_STREAMCUTS;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_EVENT_READ_TIMEOUT_INTERVAL;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_EXECUTION_TYPE;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_READER_GROUP_CHECKPOINT_INITIATE_TIMEOUT_INTERVAL;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_READER_GROUP_MAX_OUTSTANDING_CHECKPOINT_REQUEST;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_READER_GROUP_NAME;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_READER_GROUP_REFRESH_INTERVAL;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_START_STREAMCUTS;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_STREAMS;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCAN_UID;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SCOPE;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SECURITY_AUTH_TOKEN;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SECURITY_AUTH_TYPE;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SECURITY_TRUST_STORE;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SECURITY_VALIDATE_HOSTNAME;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SINK_ENABLE_WATERMARK_PROPAGATION;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SINK_ROUTINGKEY_FIELD_NAME;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SINK_SEMANTIC;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SINK_STREAM;
import static io.pravega.connectors.flink.dynamic.table.PravegaOptions.SINK_TXN_LEASE_RENEWAL_INTERVAL;
import static io.pravega.connectors.flink.util.FlinkPravegaUtils.isCredentialsLoadDynamic;

/** Utilities for {@link PravegaOptions}. */
@Internal
public class PravegaOptionsUtil {

// --------------------------------------------------------------------------------------------
// Option enumerations
// --------------------------------------------------------------------------------------------

public static final String SCAN_EXECUTION_TYPE_VALUE_STREAMING = "streaming";
public static final String SCAN_EXECUTION_TYPE_VALUE_BATCH = "batch";

public static final String SINK_SEMANTIC_VALUE_EXACTLY_ONCE = "exactly-once";
public static final String SINK_SEMANTIC_VALUE_AT_LEAST_ONCE = "at-least-once";
public static final String SINK_SEMANTIC_VALUE_BEST_EFFORT = "best-effort";

private static final Set<String> SCAN_EXECUTION_TYPE_ENUMS = new HashSet<>(Arrays.asList(
SCAN_EXECUTION_TYPE_VALUE_STREAMING,
SCAN_EXECUTION_TYPE_VALUE_BATCH
));

private static final Set<String> SINK_SEMANTIC_ENUMS = new HashSet<>(Arrays.asList(
SINK_SEMANTIC_VALUE_AT_LEAST_ONCE,
SINK_SEMANTIC_VALUE_EXACTLY_ONCE,
SINK_SEMANTIC_VALUE_BEST_EFFORT
));

private PravegaOptionsUtil() {}

// --------------------------------------------------------------------------------------------
// Validation
// --------------------------------------------------------------------------------------------

public static void validateTableSourceOptions(ReadableConfig tableOptions) {
validateScanExecutionType(tableOptions);
validateSourceStreams(tableOptions);
if (tableOptions.get(SCAN_EXECUTION_TYPE).equals(SCAN_EXECUTION_TYPE_VALUE_STREAMING)) {
validateReaderGroupConfig(tableOptions);
}
}

public static void validateTableSinkOptions(ReadableConfig tableOptions) {
validateSinkStream(tableOptions);
validateSinkSemantic(tableOptions);
}

private static void validateScanExecutionType(ReadableConfig tableOptions) {
tableOptions.getOptional(SCAN_EXECUTION_TYPE).ifPresent(type -> {
if (!SCAN_EXECUTION_TYPE_ENUMS.contains(type)) {
throw new ValidationException(
String.format("Unsupported value '%s' for '%s'. Supported values are ['streaming', 'batch'].",
type, SCAN_EXECUTION_TYPE.key()));
}
});
}

private static void validateSourceStreams(ReadableConfig tableOptions) {
List<String> streams = tableOptions.getOptional(SCAN_STREAMS)
.orElseThrow(() -> new ValidationException(String.format("'%s' is required but missing", SCAN_STREAMS.key())));

streams.forEach(NameUtils::validateStreamName);

tableOptions.getOptional(SCAN_START_STREAMCUTS).ifPresent(streamCuts -> {
if (streamCuts.size() != streams.size()) {
throw new ValidationException(
String.format("Start stream cuts are not matching the number of streams, having %d, expected %d",
streamCuts.size(), streams.size()));
}
});

tableOptions.getOptional(SCAN_END_STREAMCUTS).ifPresent(streamCuts -> {
if (streamCuts.size() != streams.size()) {
throw new ValidationException(
String.format("End stream cuts are not matching the number of streams, having %d, expected %d",
streamCuts.size(), streams.size()));
}
});
}

private static void validateReaderGroupConfig(ReadableConfig tableOptions) {
tableOptions.getOptional(SCAN_READER_GROUP_MAX_OUTSTANDING_CHECKPOINT_REQUEST).ifPresent(num -> {
if (num < 1) {
throw new ValidationException(String.format("'%s' requires a positive integer, received %d",
SCAN_READER_GROUP_MAX_OUTSTANDING_CHECKPOINT_REQUEST.key(), num));
}
});
}

private static void validateSinkSemantic(ReadableConfig tableOptions) {
tableOptions.getOptional(SINK_SEMANTIC).ifPresent(semantic -> {
if (!SINK_SEMANTIC_ENUMS.contains(semantic)) {
throw new ValidationException(
String.format("Unsupported value '%s' for '%s'. Supported values are ['at-least-once', 'exactly-once', 'best-effort'].",
semantic, SINK_SEMANTIC.key()));
}
});
}

private static void validateSinkStream(ReadableConfig tableOptions) {
String stream = tableOptions.getOptional(SINK_STREAM)
.orElseThrow(() -> new ValidationException(String.format("'%s' is required but missing", SINK_STREAM.key())));
NameUtils.validateStreamName(stream);
}

// --------------------------------------------------------------------------------------------
// Utilities
// --------------------------------------------------------------------------------------------

// ------------------------------------- Common ----------------------------------------

public static PravegaConfig getPravegaConfig(ReadableConfig tableOptions) {
PravegaConfig pravegaConfig = PravegaConfig.fromDefaults()
.withControllerURI(URI.create(tableOptions.get(CONTROLLER_URI)))
.withDefaultScope(tableOptions.get(SCOPE))
.withHostnameValidation(tableOptions.get(SECURITY_VALIDATE_HOSTNAME))
.withTrustStore(tableOptions.get(SECURITY_TRUST_STORE));

Optional<String> authType = tableOptions.getOptional(SECURITY_AUTH_TYPE);
Optional<String> authToken = tableOptions.getOptional(SECURITY_AUTH_TOKEN);
if (authType.isPresent() && authToken.isPresent() && !isCredentialsLoadDynamic()) {
pravegaConfig.withCredentials(new FlinkPravegaUtils.SimpleCredentials(authType.get(), authToken.get()));
}

return pravegaConfig;
}

// ------------------------------------- Reader ----------------------------------------

public static boolean isStreamingReader(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_EXECUTION_TYPE).equals(SCAN_EXECUTION_TYPE_VALUE_STREAMING);
}

public static String getReaderGroupName(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_NAME);
}

public static String getUid(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_UID);
}

public static long getReaderGroupRefreshTimeMillis(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_REFRESH_INTERVAL).toMillis();
}

public static long getCheckpointInitiateTimeoutMillis(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_CHECKPOINT_INITIATE_TIMEOUT_INTERVAL).toMillis();
}

public static long getEventReadTimeoutMillis(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_EVENT_READ_TIMEOUT_INTERVAL).toMillis();
}

public static int getMaxOutstandingCheckpointRequest(ReadableConfig tableOptions) {
return tableOptions.get(SCAN_READER_GROUP_MAX_OUTSTANDING_CHECKPOINT_REQUEST);
}

public static boolean isBoundedRead(ReadableConfig tableOptions) {
Optional<List<String>> endStreamCuts = tableOptions.getOptional(SCAN_END_STREAMCUTS);
return endStreamCuts.isPresent() &&
endStreamCuts.get().stream().noneMatch(cut -> cut.equals(StreamCut.UNBOUNDED.asText()));
}

public static List<StreamWithBoundaries> resolveScanStreams(ReadableConfig tableOptions) {
String scope = tableOptions.get(SCOPE);
List<String> streams = tableOptions.getOptional(SCAN_STREAMS)
.orElseThrow(() -> new TableException("Validator should have checked that"));
List<String> startStreamCuts = tableOptions.get(SCAN_START_STREAMCUTS);
List<String> endStreamCuts = tableOptions.get(SCAN_END_STREAMCUTS);
List<StreamWithBoundaries> result = new ArrayList<>();

for (int i = 0; i < streams.size(); i++) {
Stream stream = Stream.of(scope, streams.get(i));
StreamCut from = startStreamCuts == null ? StreamCut.UNBOUNDED : StreamCut.from(startStreamCuts.get(i));
StreamCut to = endStreamCuts == null ? StreamCut.UNBOUNDED : StreamCut.from(endStreamCuts.get(i));
result.add(new StreamWithBoundaries(stream, from, to));
}

return result;
}

// ------------------------------------- Writer ----------------------------------------

public static Stream getSinkStream(ReadableConfig tableOptions) {
String scope = tableOptions.get(SCOPE);
String stream = tableOptions.get(SINK_STREAM);
return Stream.of(scope, stream);
}

public static PravegaWriterMode getWriterMode(ReadableConfig tableOptions) {
switch (tableOptions.get(SINK_SEMANTIC)) {
case SINK_SEMANTIC_VALUE_EXACTLY_ONCE:
return PravegaWriterMode.EXACTLY_ONCE;
case SINK_SEMANTIC_VALUE_AT_LEAST_ONCE:
return PravegaWriterMode.ATLEAST_ONCE;
case SINK_SEMANTIC_VALUE_BEST_EFFORT:
return PravegaWriterMode.BEST_EFFORT;
default:
throw new TableException("Validator should have checked that");
}
}

public static long getTransactionLeaseRenewalIntervalMillis(ReadableConfig tableOptions) {
return tableOptions.get(SINK_TXN_LEASE_RENEWAL_INTERVAL).toMillis();
}

public static boolean isWatermarkPropagationEnabled(ReadableConfig tableOptions) {
return tableOptions.get(SINK_ENABLE_WATERMARK_PROPAGATION);
}

public static String getRoutingKeyField(ReadableConfig tableOptions) {
return tableOptions.get(SINK_ROUTINGKEY_FIELD_NAME);
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.formats.registry;

import io.pravega.connectors.flink.PravegaConfig;
import io.pravega.schemaregistry.contract.data.SerializationFormat;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.formats.common.TimestampFormat;
import org.apache.flink.formats.json.JsonFormatOptions;
import org.apache.flink.formats.json.JsonFormatOptionsUtil;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.connector.format.EncodingFormat;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.factories.DeserializationFormatFactory;
import org.apache.flink.table.factories.DynamicTableFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.factories.SerializationFormatFactory;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.RowType;

import java.net.URI;
import java.util.HashSet;
import java.util.Set;

/**
* Table format factory for providing configured instances of Pravega-Registry to Flink RowData {@link
* SerializationSchema} and {@link DeserializationSchema}.
*/
public class PravegaRegistryFormatFactory implements DeserializationFormatFactory, SerializationFormatFactory {

public static final String IDENTIFIER = "pravega-registry";

@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(
DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);

final String groupId = formatOptions.get(PravegaRegistryOptions.GROUP_ID);
final PravegaConfig pravegaConfig = PravegaRegistryOptionsUtil.getPravegaConfig(formatOptions)
.withDefaultScope(formatOptions.get(PravegaRegistryOptions.NAMESPACE))
.withSchemaRegistryURI(URI.create(formatOptions.get(PravegaRegistryOptions.URI)));

final boolean failOnMissingField = formatOptions.get(PravegaRegistryOptions.FAIL_ON_MISSING_FIELD);
final boolean ignoreParseErrors = formatOptions.get(PravegaRegistryOptions.IGNORE_PARSE_ERRORS);
TimestampFormat timestampOption = JsonFormatOptionsUtil.getTimestampFormat(formatOptions);

return new DecodingFormat<DeserializationSchema<RowData>>() {
@Override
public DeserializationSchema<RowData> createRuntimeDecoder(
DynamicTableSource.Context context, DataType producedDatatype) {
final RowType rowType = (RowType) producedDatatype.getLogicalType();
final TypeInformation<RowData> rowDataTypeInfo =
context.createTypeInformation(producedDatatype);
return new PravegaRegistryRowDataDeserializationSchema(
rowType,
rowDataTypeInfo,
groupId,
pravegaConfig,
failOnMissingField,
ignoreParseErrors,
timestampOption);
}

@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
};
}

@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(
DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);

final String groupId = formatOptions.get(PravegaRegistryOptions.GROUP_ID);
final SerializationFormat serializationFormat = SerializationFormat.valueOf(
formatOptions.get(PravegaRegistryOptions.FORMAT));
final PravegaConfig pravegaConfig = PravegaRegistryOptionsUtil.getPravegaConfig(formatOptions)
.withDefaultScope(formatOptions.get(PravegaRegistryOptions.NAMESPACE))
.withSchemaRegistryURI(URI.create(formatOptions.get(PravegaRegistryOptions.URI)));

TimestampFormat timestampOption = JsonFormatOptionsUtil.getTimestampFormat(formatOptions);
final JsonFormatOptions.MapNullKeyMode mapNullKeyMode =
JsonFormatOptionsUtil.getMapNullKeyMode(formatOptions);
final String mapNullKeyLiteral = formatOptions.get(PravegaRegistryOptions.MAP_NULL_KEY_LITERAL);
final boolean encodeDecimalAsPlainNumber = formatOptions.get(PravegaRegistryOptions.ENCODE_DECIMAL_AS_PLAIN_NUMBER);

return new EncodingFormat<SerializationSchema<RowData>>() {
@Override
public SerializationSchema<RowData> createRuntimeEncoder(
DynamicTableSink.Context context, DataType consumedDataType) {
final RowType rowType = (RowType) consumedDataType.getLogicalType();
return new PravegaRegistryRowDataSerializationSchema(
rowType,
groupId,
serializationFormat,
pravegaConfig,
timestampOption,
mapNullKeyMode,
mapNullKeyLiteral,
encodeDecimalAsPlainNumber);
}

@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
};
}

@Override
public String factoryIdentifier() {
return IDENTIFIER;
}

@Override
public Set<ConfigOption<?>> requiredOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(PravegaRegistryOptions.URI);
options.add(PravegaRegistryOptions.NAMESPACE);
options.add(PravegaRegistryOptions.GROUP_ID);
options.add(PravegaRegistryOptions.FORMAT);
return options;
}

@Override
public Set<ConfigOption<?>> optionalOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(PravegaRegistryOptions.FAIL_ON_MISSING_FIELD);
options.add(PravegaRegistryOptions.IGNORE_PARSE_ERRORS);
options.add(PravegaRegistryOptions.TIMESTAMP_FORMAT);
options.add(PravegaRegistryOptions.MAP_NULL_KEY_MODE);
options.add(PravegaRegistryOptions.MAP_NULL_KEY_LITERAL);
options.add(PravegaRegistryOptions.ENCODE_DECIMAL_AS_PLAIN_NUMBER);
options.add(PravegaRegistryOptions.SECURITY_AUTH_TYPE);
options.add(PravegaRegistryOptions.SECURITY_AUTH_TOKEN);
options.add(PravegaRegistryOptions.SECURITY_VALIDATE_HOSTNAME);
options.add(PravegaRegistryOptions.SECURITY_TRUST_STORE);
return options;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.formats.registry;

import io.pravega.connectors.flink.dynamic.table.PravegaOptions;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.formats.json.JsonFormatOptions;

public class PravegaRegistryOptions {

public static final ConfigOption<String> URI = ConfigOptions
.key("uri")
.stringType()
.noDefaultValue()
.withDescription("Required URI of Pravega schema registry");

public static final ConfigOption<String> NAMESPACE = ConfigOptions
.key("namespace")
.stringType()
.noDefaultValue()
.withDescription("Required Pravega schema registry's namespace, should be the same as the Pravega scope name");

public static final ConfigOption<String> GROUP_ID = ConfigOptions
.key("group-id")
.stringType()
.noDefaultValue()
.withDescription("Required Pravega schema registry's groupID, should be the same as the Pravega stream name");

public static final ConfigOption<String> FORMAT = ConfigOptions
.key("format")
.stringType()
.defaultValue("Avro")
.withDescription("Optional serialization format for Pravega catalog. Valid enumerations are ['Avro'(default), 'Json']");

// Pravega security options
public static final ConfigOption<String> SECURITY_AUTH_TYPE = PravegaOptions.SECURITY_AUTH_TYPE;
public static final ConfigOption<String> SECURITY_AUTH_TOKEN = PravegaOptions.SECURITY_AUTH_TOKEN;
public static final ConfigOption<Boolean> SECURITY_VALIDATE_HOSTNAME = PravegaOptions.SECURITY_VALIDATE_HOSTNAME;
public static final ConfigOption<String> SECURITY_TRUST_STORE = PravegaOptions.SECURITY_TRUST_STORE;

// --------------------------------------------------------------------------------------------
// Json Options
// --------------------------------------------------------------------------------------------

public static final ConfigOption<Boolean> FAIL_ON_MISSING_FIELD = JsonFormatOptions.FAIL_ON_MISSING_FIELD;
public static final ConfigOption<Boolean> IGNORE_PARSE_ERRORS = JsonFormatOptions.IGNORE_PARSE_ERRORS;
public static final ConfigOption<String> TIMESTAMP_FORMAT = JsonFormatOptions.TIMESTAMP_FORMAT;
public static final ConfigOption<String> MAP_NULL_KEY_MODE = JsonFormatOptions.MAP_NULL_KEY_MODE;
public static final ConfigOption<String> MAP_NULL_KEY_LITERAL = JsonFormatOptions.MAP_NULL_KEY_LITERAL;
public static final ConfigOption<Boolean> ENCODE_DECIMAL_AS_PLAIN_NUMBER = JsonFormatOptions.ENCODE_DECIMAL_AS_PLAIN_NUMBER;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.formats.registry;

import io.pravega.connectors.flink.PravegaConfig;
import io.pravega.connectors.flink.util.FlinkPravegaUtils;
import org.apache.flink.configuration.ReadableConfig;

import java.util.Optional;

import static io.pravega.connectors.flink.util.FlinkPravegaUtils.isCredentialsLoadDynamic;

public class PravegaRegistryOptionsUtil {

// --------------------------------------------------------------------------------------------
// Utilities
// --------------------------------------------------------------------------------------------

public static PravegaConfig getPravegaConfig(ReadableConfig tableOptions) {
PravegaConfig pravegaConfig = PravegaConfig.fromDefaults()
.withDefaultScope(tableOptions.get(PravegaRegistryOptions.NAMESPACE))
.withHostnameValidation(tableOptions.get(PravegaRegistryOptions.SECURITY_VALIDATE_HOSTNAME))
.withTrustStore(tableOptions.get(PravegaRegistryOptions.SECURITY_TRUST_STORE));

Optional<String> authType = tableOptions.getOptional(PravegaRegistryOptions.SECURITY_AUTH_TYPE);
Optional<String> authToken = tableOptions.getOptional(PravegaRegistryOptions.SECURITY_AUTH_TOKEN);
if (authType.isPresent() && authToken.isPresent() && !isCredentialsLoadDynamic()) {
pravegaConfig.withCredentials(new FlinkPravegaUtils.SimpleCredentials(authType.get(), authToken.get()));
}

return pravegaConfig;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.formats.registry;

import io.pravega.client.stream.Serializer;
import io.pravega.connectors.flink.PravegaConfig;
import io.pravega.connectors.flink.util.SchemaRegistryUtils;
import io.pravega.schemaregistry.client.SchemaRegistryClient;
import io.pravega.schemaregistry.client.SchemaRegistryClientConfig;
import io.pravega.schemaregistry.client.SchemaRegistryClientFactory;
import io.pravega.schemaregistry.contract.data.SchemaInfo;
import io.pravega.schemaregistry.contract.data.SerializationFormat;
import io.pravega.schemaregistry.serializer.avro.schemas.AvroSchema;
import io.pravega.schemaregistry.serializer.shared.impl.AbstractDeserializer;
import io.pravega.schemaregistry.serializer.shared.impl.EncodingCache;
import io.pravega.schemaregistry.serializer.shared.impl.SerializerConfig;
import io.pravega.schemaregistry.serializers.SerializerFactory;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.formats.avro.AvroToRowDataConverters;
import org.apache.flink.formats.avro.typeutils.AvroSchemaConverter;
import org.apache.flink.formats.common.TimestampFormat;
import org.apache.flink.formats.json.JsonToRowDataConverters;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.DeserializationFeature;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.DecimalType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.table.types.logical.utils.LogicalTypeChecks;

import javax.annotation.Nullable;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Objects;

import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* Deserialization schema from Pravega Schema Registry to Flink Table/SQL internal data structure {@link RowData}.
*
* <p>Deserializes a <code>byte[]</code> message as a Pravega Schema Registry and reads the specified fields.
*
* <p>Failures during deserialization are forwarded as wrapped IOExceptions.
*/
public class PravegaRegistryRowDataDeserializationSchema implements DeserializationSchema<RowData> {
private static final long serialVersionUID = 1L;

/**
* Row type to generate the runtime converter.
*/
private final RowType rowType;

/**
* Type information describing the result type.
*/
private final TypeInformation<RowData> typeInfo;

/**
* Namespace describing the current scope.
*/
private final String namespace;

/**
* GroupId describing the current stream.
*/
private final String groupId;

/**
* Serialization format for schema registry.
*/
private SerializationFormat serializationFormat;

/**
* Pravega config for generating schema registry config.
*/
private final PravegaConfig pravegaConfig;

/**
* Deserializer to deserialize <code>byte[]</code> message.
*/
private transient Serializer deserializer;

// --------------------------------------------------------------------------------------------
// Json fields
// --------------------------------------------------------------------------------------------

/** Flag indicating whether to fail if a field is missing. */
private final boolean failOnMissingField;

/** Flag indicating whether to ignore invalid fields/rows (default: throw an exception). */
private final boolean ignoreParseErrors;

/** Timestamp format specification which is used to parse timestamp. */
private final TimestampFormat timestampFormat;

public PravegaRegistryRowDataDeserializationSchema(
RowType rowType,
TypeInformation<RowData> typeInfo,
String groupId,
PravegaConfig pravegaConfig,
boolean failOnMissingField,
boolean ignoreParseErrors,
TimestampFormat timestampFormat
) {
if (ignoreParseErrors && failOnMissingField) {
throw new IllegalArgumentException(
"JSON format doesn't support failOnMissingField and ignoreParseErrors are both enabled.");
}
this.rowType = rowType;
this.typeInfo = checkNotNull(typeInfo);
this.namespace = pravegaConfig.getDefaultScope();
this.groupId = groupId;
this.pravegaConfig = pravegaConfig;
this.failOnMissingField = failOnMissingField;
this.ignoreParseErrors = ignoreParseErrors;
this.timestampFormat = timestampFormat;
}

@SuppressWarnings("unchecked")
@Override
public void open(InitializationContext context) throws Exception {
SchemaRegistryClientConfig schemaRegistryClientConfig =
SchemaRegistryUtils.getSchemaRegistryClientConfig(pravegaConfig);
SchemaRegistryClient schemaRegistryClient = SchemaRegistryClientFactory.withNamespace(namespace,
schemaRegistryClientConfig);
SerializerConfig config = SerializerConfig.builder()
.registryConfig(schemaRegistryClientConfig)
.namespace(namespace)
.groupId(groupId)
.build();
serializationFormat = schemaRegistryClient.getGroupProperties(groupId).getSerializationFormat();

switch (serializationFormat) {
case Avro:
AvroSchema<Object> schema = AvroSchema.of(AvroSchemaConverter.convertToSchema(rowType));
deserializer = SerializerFactory.avroGenericDeserializer(config, schema);
break;
case Json:
ObjectMapper objectMapper = new ObjectMapper();
boolean hasDecimalType =
LogicalTypeChecks.hasNested(rowType, t -> t instanceof DecimalType);
if (hasDecimalType) {
objectMapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
}
deserializer = new FlinkJsonGenericDeserializer(
groupId,
schemaRegistryClient,
config.getDecoders(),
new EncodingCache(groupId, schemaRegistryClient),
config.isWriteEncodingHeader(),
objectMapper);
break;
default:
throw new NotImplementedException("Not supporting deserialization format");
}
}

@Override
public RowData deserialize(@Nullable byte[] message) throws IOException {
if (message == null) {
return null;
}
try {
return convertToRowData(deserializeToObject(message));
} catch (Exception e) {
if (ignoreParseErrors) {
return null;
}
throw new IOException("Failed to deserialize byte array.", e);
}
}

public Object deserializeToObject(byte[] message) {
return deserializer.deserialize(ByteBuffer.wrap(message));
}

public RowData convertToRowData(Object message) {
Object o;
switch (serializationFormat) {
case Avro:
AvroToRowDataConverters.AvroToRowDataConverter avroConverter =
AvroToRowDataConverters.createRowConverter(rowType);
o = avroConverter.convert(message);
break;
case Json:
JsonToRowDataConverters.JsonToRowDataConverter jsonConverter =
new JsonToRowDataConverters(failOnMissingField, ignoreParseErrors, timestampFormat)
.createConverter(checkNotNull(rowType));
o = jsonConverter.convert((JsonNode) message);
break;
default:
throw new NotImplementedException("Not supporting deserialization format");
}
return (RowData) o;
}

private static class FlinkJsonGenericDeserializer extends AbstractDeserializer<JsonNode> {
private final ObjectMapper objectMapper;

public FlinkJsonGenericDeserializer(String groupId, SchemaRegistryClient client,
SerializerConfig.Decoders decoders, EncodingCache encodingCache,
boolean encodeHeader, ObjectMapper objectMapper) {
super(groupId, client, null, false, decoders, encodingCache, encodeHeader);
this.objectMapper = objectMapper;
}

@Override
public final JsonNode deserialize(InputStream inputStream,
SchemaInfo writerSchemaInfo,
SchemaInfo readerSchemaInfo) throws IOException {
return objectMapper.readTree(inputStream);
}
}

@Override
public boolean isEndOfStream(RowData nextElement) {
return false;
}

@Override
public TypeInformation<RowData> getProducedType() {
return typeInfo;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
PravegaRegistryRowDataDeserializationSchema that = (PravegaRegistryRowDataDeserializationSchema) o;
return failOnMissingField == that.failOnMissingField && ignoreParseErrors == that.ignoreParseErrors &&
Objects.equals(rowType, that.rowType) && Objects.equals(typeInfo, that.typeInfo) &&
Objects.equals(namespace, that.namespace) && Objects.equals(groupId, that.groupId) &&
serializationFormat == that.serializationFormat &&
timestampFormat == that.timestampFormat;
}

@Override
public int hashCode() {
return Objects.hash(rowType, typeInfo, namespace, groupId, serializationFormat, pravegaConfig,
failOnMissingField, ignoreParseErrors, timestampFormat);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.formats.registry;

import io.pravega.client.stream.Serializer;
import io.pravega.connectors.flink.PravegaConfig;
import io.pravega.connectors.flink.table.catalog.pravega.util.PravegaSchemaUtils;
import io.pravega.connectors.flink.util.SchemaRegistryUtils;
import io.pravega.schemaregistry.client.SchemaRegistryClient;
import io.pravega.schemaregistry.client.SchemaRegistryClientConfig;
import io.pravega.schemaregistry.client.SchemaRegistryClientFactory;
import io.pravega.schemaregistry.contract.data.SchemaInfo;
import io.pravega.schemaregistry.contract.data.SerializationFormat;
import io.pravega.schemaregistry.serializer.avro.schemas.AvroSchema;
import io.pravega.schemaregistry.serializer.json.schemas.JSONSchema;
import io.pravega.schemaregistry.serializer.shared.codec.Encoder;
import io.pravega.schemaregistry.serializer.shared.impl.AbstractSerializer;
import io.pravega.schemaregistry.serializer.shared.impl.SerializerConfig;
import io.pravega.schemaregistry.serializers.SerializerFactory;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.formats.avro.RowDataToAvroConverters;
import org.apache.flink.formats.avro.typeutils.AvroSchemaConverter;
import org.apache.flink.formats.common.TimestampFormat;
import org.apache.flink.formats.json.JsonFormatOptions;
import org.apache.flink.formats.json.RowDataToJsonConverters;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.RowType;

import java.io.IOException;
import java.io.OutputStream;
import java.util.Objects;

/**
* Serialization schema that serializes an object of Flink internal data structure {@link RowData} into
* Pravega Schema Registry bytes.
*
* <p>Serializes the input Flink object into GenericRecord and converts it into <code>byte[]</code>.
*
* <p>Result <code>byte[]</code> messages can be deserialized using {@link
* PravegaRegistryRowDataDeserializationSchema}.
*/
public class PravegaRegistryRowDataSerializationSchema implements SerializationSchema<RowData> {
private static final long serialVersionUID = 1L;

/** RowType to generate the runtime converter. */
private final RowType rowType;

/** Serializer to serialize to <code>byte[]</code>. */
private transient Serializer serializer;

/**
* Namespace describing the current scope.
*/
private final String namespace;

/**
* GroupId describing the current stream.
*/
private final String groupId;

/**
* Serialization format for schema registry.
*/
private SerializationFormat serializationFormat;

/**
* Pravega config for generating schema registry config.
*/
private final PravegaConfig pravegaConfig;

// --------------------------------------------------------------------------------------------
// Avro fields
// --------------------------------------------------------------------------------------------

/** Avro serialization schema. */
private transient Schema avroSchema;

// --------------------------------------------------------------------------------------------
// Json fields
// --------------------------------------------------------------------------------------------

/** Timestamp format specification which is used to parse timestamp. */
private final TimestampFormat timestampFormat;

/** The handling mode when serializing null keys for map data. */
private final JsonFormatOptions.MapNullKeyMode mapNullKeyMode;

/** The string literal when handling mode for map null key LITERAL. is */
private final String mapNullKeyLiteral;

/** Flag indicating whether to serialize all decimals as plain numbers. */
private final boolean encodeDecimalAsPlainNumber;

public PravegaRegistryRowDataSerializationSchema(
RowType rowType,
String groupId,
SerializationFormat serializationFormat,
PravegaConfig pravegaConfig,
TimestampFormat timestampOption,
JsonFormatOptions.MapNullKeyMode mapNullKeyMode,
String mapNullKeyLiteral,
boolean encodeDecimalAsPlainNumber) {
this.rowType = rowType;
this.serializer = null;
this.namespace = pravegaConfig.getDefaultScope();
this.groupId = groupId;
this.serializationFormat = serializationFormat;
this.pravegaConfig = pravegaConfig;
this.timestampFormat = timestampOption;
this.mapNullKeyMode = mapNullKeyMode;
this.mapNullKeyLiteral = mapNullKeyLiteral;
this.encodeDecimalAsPlainNumber = encodeDecimalAsPlainNumber;
}

@SuppressWarnings("unchecked")
@Override
public void open(InitializationContext context) throws Exception {
SchemaRegistryClientConfig schemaRegistryClientConfig =
SchemaRegistryUtils.getSchemaRegistryClientConfig(pravegaConfig);
SchemaRegistryClient schemaRegistryClient = SchemaRegistryClientFactory.withNamespace(namespace,
schemaRegistryClientConfig);
SerializerConfig config = SerializerConfig.builder()
.registryConfig(schemaRegistryClientConfig)
.namespace(namespace)
.groupId(groupId)
.build();

switch (serializationFormat) {
case Avro:
avroSchema = AvroSchemaConverter.convertToSchema(rowType);
serializer = SerializerFactory.avroSerializer(config, AvroSchema.ofRecord(avroSchema));
break;
case Json:
String jsonSchemaString = PravegaSchemaUtils.convertToJsonSchemaString(rowType);
serializer = new FlinkJsonSerializer(
groupId,
schemaRegistryClient,
JSONSchema.of("", jsonSchemaString, JsonNode.class),
config.getEncoder(),
config.isRegisterSchema(),
config.isWriteEncodingHeader());
break;
default:
throw new NotImplementedException("Not supporting deserialization format");
}
}

@SuppressWarnings("unchecked")
@Override
public byte[] serialize(RowData row) {
try {
switch (serializationFormat) {
case Avro:
return convertToByteArray(serializeToGenericRecord(row));
case Json:
return convertToByteArray(serializaToJsonNode(row));
default:
throw new NotImplementedException("Not supporting deserialization format");
}
} catch (Exception e) {
throw new RuntimeException("Failed to serialize row.", e);
}
}

public GenericRecord serializeToGenericRecord(RowData row) {
RowDataToAvroConverters.RowDataToAvroConverter runtimeConverter =
RowDataToAvroConverters.createConverter(rowType);
return (GenericRecord) runtimeConverter.convert(avroSchema, row);
}

public JsonNode serializaToJsonNode(RowData row) {
RowDataToJsonConverters.RowDataToJsonConverter runtimeConverter = new RowDataToJsonConverters(
timestampFormat, mapNullKeyMode, mapNullKeyLiteral)
.createConverter(rowType);
ObjectMapper mapper = new ObjectMapper().configure(
JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, encodeDecimalAsPlainNumber);
ObjectNode node = mapper.createObjectNode();
return runtimeConverter.convert(mapper, node, row);
}

@SuppressWarnings("unchecked")
public byte[] convertToByteArray(Object message) {
return serializer.serialize(message).array();
}

@VisibleForTesting
protected static class FlinkJsonSerializer extends AbstractSerializer<JsonNode> {
private final ObjectMapper objectMapper;
public FlinkJsonSerializer(String groupId, SchemaRegistryClient client, JSONSchema schema,
Encoder encoder, boolean registerSchema, boolean encodeHeader) {
super(groupId, client, schema, encoder, registerSchema, encodeHeader);
objectMapper = new ObjectMapper();
}

@Override
protected void serialize(JsonNode jsonNode, SchemaInfo schemaInfo, OutputStream outputStream) throws IOException {
objectMapper.writeValue(outputStream, jsonNode);
outputStream.flush();
}
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
PravegaRegistryRowDataSerializationSchema that = (PravegaRegistryRowDataSerializationSchema) o;
return Objects.equals(rowType, that.rowType) && Objects.equals(namespace, that.namespace) &&
Objects.equals(groupId, that.groupId) &&
serializationFormat == that.serializationFormat && timestampFormat == that.timestampFormat &&
mapNullKeyMode == that.mapNullKeyMode && Objects.equals(mapNullKeyLiteral, that.mapNullKeyLiteral)
&& encodeDecimalAsPlainNumber == that.encodeDecimalAsPlainNumber;
}

@Override
public int hashCode() {
return Objects.hash(rowType, namespace, groupId, pravegaConfig, serializationFormat,
timestampFormat, mapNullKeyMode, mapNullKeyLiteral, encodeDecimalAsPlainNumber);
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.serialization;
@@ -22,18 +28,19 @@
import io.pravega.schemaregistry.serializer.json.schemas.JSONSchema;
import io.pravega.schemaregistry.serializer.shared.impl.SerializerConfig;
import io.pravega.schemaregistry.serializers.SerializerFactory;
import lombok.extern.slf4j.Slf4j;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Serializable;
import java.nio.ByteBuffer;

@Slf4j
public class DeserializerFromSchemaRegistry<T> implements Serializer<T>, Serializable {
private static final Logger LOG = LoggerFactory.getLogger(DeserializerFromSchemaRegistry.class);

private static final long serialVersionUID = 1L;

@@ -60,10 +67,9 @@ private void initialize() {

try (SchemaRegistryClient schemaRegistryClient = SchemaRegistryClientFactory.withNamespace(
pravegaConfig.getDefaultScope(), schemaRegistryClientConfig)) {
format = schemaRegistryClient.getLatestSchemaVersion(group, null)
.getSchemaInfo().getSerializationFormat();
format = schemaRegistryClient.getGroupProperties(group).getSerializationFormat();
} catch (Exception e) {
log.error("Error while closing the schema registry client", e);
LOG.error("Error while closing the schema registry client", e);
throw new FlinkRuntimeException(e);
}

@@ -80,10 +86,10 @@ private void initialize() {
break;
case Avro:
Preconditions.checkArgument(IndexedRecord.class.isAssignableFrom(tClass));
if (GenericRecord.class.isAssignableFrom(tClass)) {
serializer = (Serializer<T>) SerializerFactory.avroGenericDeserializer(serializerConfig, null);
} else {
if (SpecificRecordBase.class.isAssignableFrom(tClass)) {
serializer = SerializerFactory.avroDeserializer(serializerConfig, AvroSchema.of(tClass));
} else {
serializer = (Serializer<T>) SerializerFactory.avroGenericDeserializer(serializerConfig, null);
}
break;
case Protobuf:
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.serialization;

@@ -21,7 +27,7 @@ public class JsonSerializer<T> implements Serializer<T>, Serializable {

/** Object mapper for parsing the JSON. */
private final ObjectMapper objectMapper = new ObjectMapper();
private Class<T> valueType;
private final Class<T> valueType;

public JsonSerializer(Class<T> valueType) {
this.valueType = valueType;
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.serialization;

import io.pravega.client.stream.EventRead;
import io.pravega.client.stream.Serializer;
import org.apache.flink.api.common.functions.InvalidTypesException;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;

import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;

import org.apache.flink.api.common.functions.InvalidTypesException;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.serialization.DeserializationSchema;

import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

@@ -31,8 +35,7 @@
* exposes the produced type (TypeInformation) to allow Flink to configure its internal
* serialization and persistence stack.
*
* <p>An additional method {@link #extractEvent(EventRead)} is provided for
* applying the metadata in the deserialization. This method can be overriden in the extended class. </p>
* <p>To deserialize metadata, use {@link PravegaDeserializationSchemaWithMetadata} instead.
*/
public class PravegaDeserializationSchema<T>
implements DeserializationSchema<T>, WrappingSerializer<T> {
@@ -130,18 +133,6 @@ public Serializer<T> getWrappedSerializer() {
return serializer;
}

/**
* An method for applying the metadata in deserialization.
* Override it in the custom extended {@link PravegaDeserializationSchema} if the Pravega metadata is needed.
*
* @param eventRead The EventRead structure the client returns which contains metadata
*
* @return the deserialized event with metadata
*/
public T extractEvent(EventRead<T> eventRead) {
return eventRead.getEvent();
}

// ------------------------------------------------------------------------

private static void checkSerializer(Serializer<?> serializer) {
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.serialization;

import io.pravega.client.stream.EventRead;
import io.pravega.connectors.flink.FlinkPravegaReader;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.util.Collector;

import java.io.IOException;
import java.nio.ByteBuffer;

/**
* A Pravega DeserializationSchema that enables deserializing events together with
* the Pravega {@link EventRead} metadata, this can be used for recording and indexing use cases. <p>
*
* This deserialization schema disables the
* {@link PravegaDeserializationSchemaWithMetadata#deserialize(byte[])} method and
* delegates the deserialization to
* {@link PravegaDeserializationSchemaWithMetadata#deserialize(byte[], EventRead)}.
* {@link FlinkPravegaReader} will distinguish this from a normal deserialization schema and
* call {@link PravegaDeserializationSchemaWithMetadata#deserialize(byte[], EventRead)} when it is reading events.
*/
public abstract class PravegaDeserializationSchemaWithMetadata<T> implements DeserializationSchema<T> {
public abstract T deserialize(byte[] message, EventRead<ByteBuffer> eventRead) throws IOException;

public void deserialize(byte[] message, EventRead<ByteBuffer> eventRead, Collector<T> out) throws IOException {
T deserialize = deserialize(message, eventRead);
if (deserialize != null) {
out.collect(deserialize);
}
}

public T deserialize(byte[] message) throws IOException {
throw new IllegalStateException("Should never be called.");
}
}
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.serialization;

import io.pravega.client.stream.Serializer;
import java.nio.ByteBuffer;
import org.apache.flink.api.common.serialization.SerializationSchema;

import java.nio.ByteBuffer;

import static io.pravega.connectors.flink.util.FlinkPravegaUtils.byteBufferToArray;

/**
* A serialization schema adapter for a Pravega serializer.
*/
@@ -29,14 +38,7 @@ public PravegaSerializationSchema(Serializer<T> serializer) {
@Override
public byte[] serialize(T element) {
ByteBuffer buf = serializer.serialize(element);

if (buf.hasArray() && buf.arrayOffset() == 0 && buf.position() == 0 && buf.limit() == buf.capacity()) {
return buf.array();
} else {
byte[] bytes = new byte[buf.remaining()];
buf.get(bytes);
return bytes;
}
return byteBufferToArray(buf);
}

@Override
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
/**
* Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.pravega.connectors.flink.serialization;

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.table.catalog.pravega.factories;

import io.pravega.connectors.flink.PravegaConfig;
import io.pravega.connectors.flink.dynamic.table.FlinkPravegaDynamicTableFactory;
import io.pravega.connectors.flink.dynamic.table.PravegaOptions;
import io.pravega.connectors.flink.dynamic.table.PravegaOptionsUtil;
import io.pravega.connectors.flink.formats.registry.PravegaRegistryFormatFactory;
import io.pravega.connectors.flink.formats.registry.PravegaRegistryOptions;
import io.pravega.connectors.flink.table.catalog.pravega.PravegaCatalog;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.DelegatingConfiguration;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.catalog.Catalog;
import org.apache.flink.table.factories.CatalogFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.URI;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/** Factory for {@link PravegaCatalog}. */
public class PravegaCatalogFactory implements CatalogFactory {
// the prefix of checkpoint names json related options
private static final String JSON_PREFIX = "json.";
// the prefix of Pravega security options
private static final String PRAVEGA_SECURITY_PREFIX = "security.";

private static final Logger LOG = LoggerFactory.getLogger(PravegaCatalogFactory.class);

@Override
public String factoryIdentifier() {
return PravegaCatalogFactoryOptions.IDENTIFIER;
}

@Override
public Set<ConfigOption<?>> requiredOptions() {
final Set<ConfigOption<?>> options = new HashSet<>();
options.add(PravegaCatalogFactoryOptions.DEFAULT_DATABASE);
options.add(PravegaCatalogFactoryOptions.CONTROLLER_URI);
options.add(PravegaCatalogFactoryOptions.SCHEMA_REGISTRY_URI);
return options;
}

@Override
public Set<ConfigOption<?>> optionalOptions() {
final Set<ConfigOption<?>> options = new HashSet<>();
options.add(PravegaCatalogFactoryOptions.SECURITY_AUTH_TYPE);
options.add(PravegaCatalogFactoryOptions.SECURITY_AUTH_TOKEN);
options.add(PravegaCatalogFactoryOptions.SECURITY_VALIDATE_HOSTNAME);
options.add(PravegaCatalogFactoryOptions.SECURITY_TRUST_STORE);
options.add(PravegaCatalogFactoryOptions.SERIALIZATION_FORMAT);
options.add(PravegaCatalogFactoryOptions.JSON_FAIL_ON_MISSING_FIELD);
options.add(PravegaCatalogFactoryOptions.JSON_IGNORE_PARSE_ERRORS);
options.add(PravegaCatalogFactoryOptions.JSON_TIMESTAMP_FORMAT);
options.add(PravegaCatalogFactoryOptions.JSON_MAP_NULL_KEY_MODE);
options.add(PravegaCatalogFactoryOptions.JSON_MAP_NULL_KEY_LITERAL);
options.add(PravegaCatalogFactoryOptions.JSON_ENCODE_DECIMAL_AS_PLAIN_NUMBER);
return options;
}

@Override
public Catalog createCatalog(Context context) {
final FactoryUtil.CatalogFactoryHelper helper =
FactoryUtil.createCatalogFactoryHelper(this, context);
// skip validating the options that have 'json' prefix since ConfigOptions in
// PravegaCatalogFactoryOptions don't have 'json' prefix.
// will validate these options later in PravegaRegistryFormatFactory
helper.validateExcept(JSON_PREFIX);

// all catalog options
ReadableConfig configOptions = helper.getOptions();

Map<String, String> properties = getCatalogOptions((Configuration) configOptions);
PravegaConfig pravegaConfig = PravegaOptionsUtil.getPravegaConfig(configOptions)
.withDefaultScope(configOptions.get(PravegaCatalogFactoryOptions.DEFAULT_DATABASE))
.withSchemaRegistryURI(URI.create(configOptions.get(PravegaCatalogFactoryOptions.SCHEMA_REGISTRY_URI)));
return new PravegaCatalog(
context.getName(),
configOptions.get(PravegaCatalogFactoryOptions.DEFAULT_DATABASE),
properties,
pravegaConfig,
configOptions.get(PravegaCatalogFactoryOptions.SERIALIZATION_FORMAT));
}

private Map<String, String> getCatalogOptions(Configuration configOptions) {
Map<String, String> properties = new HashMap<>();

// table options
properties.put(FactoryUtil.CONNECTOR.key(), FlinkPravegaDynamicTableFactory.IDENTIFIER);
properties.put(PravegaOptions.CONTROLLER_URI.key(), configOptions.get(PravegaCatalogFactoryOptions.CONTROLLER_URI));
properties.put(FactoryUtil.FORMAT.key(), PravegaRegistryFormatFactory.IDENTIFIER);

// Pravega registry options
properties.put(String.format("%s.%s",
PravegaRegistryFormatFactory.IDENTIFIER, PravegaRegistryOptions.URI.key()),
configOptions.get(PravegaCatalogFactoryOptions.SCHEMA_REGISTRY_URI));
properties.put(String.format("%s.%s",
PravegaRegistryFormatFactory.IDENTIFIER, PravegaRegistryOptions.FORMAT.key()),
configOptions.get(PravegaCatalogFactoryOptions.SERIALIZATION_FORMAT));

// copy security options to both table options and Pravega registry options
for (Map.Entry<String, String> entry : configOptions.toMap().entrySet()) {
if (entry.getKey().startsWith(PRAVEGA_SECURITY_PREFIX)) {
properties.put(entry.getKey(), entry.getValue());
properties.put(String.format("%s.%s", PravegaRegistryFormatFactory.IDENTIFIER, entry.getKey()),
entry.getValue());
}
}

// options that separate "json" prefix and the configuration
DelegatingConfiguration delegatingConfiguration = new DelegatingConfiguration(configOptions, JSON_PREFIX);

// put json related options into properties
Map<String, String> jsonProperties = delegatingConfiguration.toMap();
jsonProperties.forEach((key, value) ->
properties.put(String.format("%s.%s", PravegaRegistryFormatFactory.IDENTIFIER, key), value));
return properties;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.table.catalog.pravega.factories;

import io.pravega.connectors.flink.dynamic.table.PravegaOptions;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.formats.json.JsonFormatOptions;
import org.apache.flink.table.catalog.CommonCatalogOptions;

/** {@link ConfigOption}s for {@link PravegaCatalogFactory}. */
public class PravegaCatalogFactoryOptions {

public static final String IDENTIFIER = "pravega";

public static final ConfigOption<String> DEFAULT_DATABASE =
ConfigOptions.key(CommonCatalogOptions.DEFAULT_DATABASE_KEY).stringType().noDefaultValue()
.withDescription("Required default database");

// required Pravega controller URI
public static final ConfigOption<String> CONTROLLER_URI = PravegaOptions.CONTROLLER_URI;

public static final ConfigOption<String> SCHEMA_REGISTRY_URI =
ConfigOptions.key("schema-registry-uri").stringType().noDefaultValue().withDescription("Required Schema Registry URI");

public static final ConfigOption<String> SERIALIZATION_FORMAT =
ConfigOptions.key("serialization.format").stringType().defaultValue("Avro")
.withDescription("Optional serialization format for Pravega catalog. Valid enumerations are ['Avro'(default), 'Json']");

// Pravega security options
public static final ConfigOption<String> SECURITY_AUTH_TYPE = PravegaOptions.SECURITY_AUTH_TYPE;
public static final ConfigOption<String> SECURITY_AUTH_TOKEN = PravegaOptions.SECURITY_AUTH_TOKEN;
public static final ConfigOption<Boolean> SECURITY_VALIDATE_HOSTNAME = PravegaOptions.SECURITY_VALIDATE_HOSTNAME;
public static final ConfigOption<String> SECURITY_TRUST_STORE = PravegaOptions.SECURITY_TRUST_STORE;

// Json related options
public static final ConfigOption<Boolean> JSON_FAIL_ON_MISSING_FIELD = JsonFormatOptions.FAIL_ON_MISSING_FIELD;
public static final ConfigOption<Boolean> JSON_IGNORE_PARSE_ERRORS = JsonFormatOptions.IGNORE_PARSE_ERRORS;
public static final ConfigOption<String> JSON_TIMESTAMP_FORMAT = JsonFormatOptions.TIMESTAMP_FORMAT;
public static final ConfigOption<String> JSON_MAP_NULL_KEY_MODE = JsonFormatOptions.MAP_NULL_KEY_MODE;
public static final ConfigOption<String> JSON_MAP_NULL_KEY_LITERAL = JsonFormatOptions.MAP_NULL_KEY_LITERAL;
public static final ConfigOption<Boolean> JSON_ENCODE_DECIMAL_AS_PLAIN_NUMBER = JsonFormatOptions.ENCODE_DECIMAL_AS_PLAIN_NUMBER;

private PravegaCatalogFactoryOptions() {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/**
* Copyright Pravega Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.pravega.connectors.flink.table.catalog.pravega.util;

import io.pravega.schemaregistry.contract.data.SchemaInfo;
import io.pravega.schemaregistry.contract.data.SerializationFormat;
import io.pravega.schemaregistry.serializer.avro.schemas.AvroSchema;
import io.pravega.schemaregistry.serializer.json.schemas.JSONSchema;
import org.apache.avro.Schema;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.flink.annotation.Internal;
import org.apache.flink.formats.avro.typeutils.AvroSchemaConverter;
import org.apache.flink.formats.json.JsonRowSchemaConverter;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.catalog.ResolvedSchema;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.table.types.utils.DataTypeUtils;
import org.apache.flink.table.types.utils.TypeConversions;

import java.util.List;

@Internal
public class PravegaSchemaUtils {

private PravegaSchemaUtils() {
// private
}

public static ResolvedSchema schemaInfoToResolvedSchema(SchemaInfo schemaInfo) {

SerializationFormat format = schemaInfo.getSerializationFormat();
String schemaString;
DataType dataType;

switch (format) {
case Json:
JSONSchema jsonSchema = JSONSchema.from(schemaInfo);
schemaString = jsonSchema.getSchemaString();
dataType = TypeConversions.fromLegacyInfoToDataType(JsonRowSchemaConverter.convert(schemaString));
break;
case Avro:
AvroSchema avroSchema = AvroSchema.from(schemaInfo);

schemaString = avroSchema.getSchema().toString();
dataType = TypeConversions.fromLegacyInfoToDataType(AvroSchemaConverter.convertToTypeInfo(schemaString));
break;

default:
throw new NotImplementedException("Not supporting serialization format");
}

return DataTypeUtils.expandCompositeTypeToSchema(dataType);
}

public static SchemaInfo tableSchemaToSchemaInfo(TableSchema tableSchema, SerializationFormat serializationFormat) {
switch (serializationFormat) {
case Avro:
Schema schema = AvroSchemaConverter.convertToSchema(tableSchema.toRowDataType().getLogicalType());
AvroSchema avroSchema = AvroSchema.of(schema);
return avroSchema.getSchemaInfo();
case Json:
LogicalType logicalType = tableSchema.toRowDataType().getLogicalType();
String schemaString = convertToJsonSchemaString(logicalType);
JSONSchema<JsonNode> jsonSchema = JSONSchema.of("", schemaString, JsonNode.class);
return jsonSchema.getSchemaInfo();
default:
throw new NotImplementedException("Not supporting serialization format");
}
}

/**
* Converts Flink SQL {@link LogicalType} (can be nested) into a Json Schema String.
* <p>
* @param logicalType logical type
* @return String matching this logical type.
*/
public static String convertToJsonSchemaString(LogicalType logicalType) {
StringBuilder sb = new StringBuilder();
switch (logicalType.getTypeRoot()) {
case NULL:
sb.append("null");
break;
case BOOLEAN:
sb.append("boolean");
break;
case CHAR:
case VARCHAR:
sb.append("string");
break;
case BINARY:
case VARBINARY:
sb.append("string").append("\", \"").append("contentEncoding")
.append("\": \"").append("base64");
break;
case DECIMAL:
case TINYINT:
case SMALLINT:
case INTEGER:
case BIGINT:
case FLOAT:
case DOUBLE:
case INTERVAL_DAY_TIME:
case INTERVAL_YEAR_MONTH:
sb.append("number");
break;
case DATE:
sb.append("string").append("\", \"").append("format")
.append("\": \"").append("date");
break;
case TIME_WITHOUT_TIME_ZONE:
sb.append("string").append("\", \"").append("format")
.append("\": \"").append("time");
break;
case TIMESTAMP_WITHOUT_TIME_ZONE:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
sb.append("string").append("\", \"").append("format")
.append("\": \"").append("date-time");
break;
case ARRAY:
sb.append("array");
break;
case MULTISET:
case MAP:
sb.append("object");
break;
case ROW:
RowType rowType = (RowType) logicalType;
List<String> fieldNames = rowType.getFieldNames();
sb.append("{").append("\"title\": ").append("\"Json Schema\", ").
append("\"type\": ").append("\"object\", ").
append("\"properties\": { ");
for (int i = 0; i < rowType.getFieldCount(); i++) {
String fieldName = fieldNames.get(i);
String fieldType = convertToJsonSchemaString(rowType.getTypeAt(i));
sb.append("\"").append(fieldName).append("\": {").
append("\"type\": \"").append(fieldType).append("\"},");
}
sb.deleteCharAt(sb.length() - 1).append("}}");
break;
case RAW:
default:
throw new UnsupportedOperationException("Unsupported type: " + logicalType);
}
return sb.toString();
}
}
Loading