diff --git a/flink-connector-kafka/archunit-violations/27a0a5e4-29c2-4069-b381-952746c90862 b/flink-connector-kafka/archunit-violations/27a0a5e4-29c2-4069-b381-952746c90862 index c7abba76e..e69de29bb 100644 --- a/flink-connector-kafka/archunit-violations/27a0a5e4-29c2-4069-b381-952746c90862 +++ b/flink-connector-kafka/archunit-violations/27a0a5e4-29c2-4069-b381-952746c90862 @@ -1 +0,0 @@ -Method calls method in (FlinkKafkaProducer.java:1327) \ No newline at end of file diff --git a/flink-connector-kafka/archunit-violations/86dfd459-67a9-4b26-9b5c-0b0bbf22681a b/flink-connector-kafka/archunit-violations/86dfd459-67a9-4b26-9b5c-0b0bbf22681a index 07efd19a9..8d50a018e 100644 --- a/flink-connector-kafka/archunit-violations/86dfd459-67a9-4b26-9b5c-0b0bbf22681a +++ b/flink-connector-kafka/archunit-violations/86dfd459-67a9-4b26-9b5c-0b0bbf22681a @@ -1,101 +1,5 @@ org.apache.flink.connector.kafka.sink.FlinkKafkaInternalProducerITCase does not satisfy: only one of the following predicates match:\ * reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaSinkITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaTransactionLogITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaWriterITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaWriterITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaWriterFaultToleranceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaWriterITCase does not satisfy: only one of the following predicates match: -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.source.KafkaSourceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.source.KafkaSourceLegacyITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.FlinkKafkaInternalProducerITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducerITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.KafkaITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.KafkaProducerAtLeastOnceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.KafkaProducerExactlyOnceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleExactlyOnceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.FlinkKafkaInternalProducerITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ * reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ * reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ * reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ @@ -112,14 +16,13 @@ org.apache.flink.connector.kafka.sink.KafkaTransactionLogITCase does not satisfy * reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ * reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaWriterITCase does not satisfy: only one of the following predicates match:\ org.apache.flink.connector.kafka.sink.KafkaWriterFaultToleranceITCase does not satisfy: only one of the following predicates match:\ * reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ * reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ * reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ * reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.sink.KafkaWriterFaultToleranceITCase does not satisfy: only one of the following predicates match:\ +org.apache.flink.connector.kafka.sink.KafkaWriterITCase does not satisfy: only one of the following predicates match:\ * reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ * reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ * reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ @@ -131,51 +34,3 @@ org.apache.flink.connector.kafka.source.KafkaSourceITCase does not satisfy: only * reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ * reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.connector.kafka.source.KafkaSourceLegacyITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.FlinkKafkaInternalProducerITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducerITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.KafkaITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.KafkaProducerAtLeastOnceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.KafkaProducerExactlyOnceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleExactlyOnceITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -org.apache.flink.streaming.connectors.kafka.shuffle.KafkaShuffleITCase does not satisfy: only one of the following predicates match:\ -* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ -* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ -* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ - or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule diff --git a/flink-connector-kafka/archunit-violations/c0d94764-76a0-4c50-b617-70b1754c4612 b/flink-connector-kafka/archunit-violations/c0d94764-76a0-4c50-b617-70b1754c4612 index 20326f5ec..e41cb001a 100644 --- a/flink-connector-kafka/archunit-violations/c0d94764-76a0-4c50-b617-70b1754c4612 +++ b/flink-connector-kafka/archunit-violations/c0d94764-76a0-4c50-b617-70b1754c4612 @@ -1,35 +1,10 @@ Class is annotated with in (KafkaSourceEnumerator.java:0) Class is annotated with in (KafkaSourceEnumerator.java:0) -Class extends class in (FlinkKafkaProducer.java:0) -Class is annotated with in (FlinkKafkaProducer.java:0) -Class is annotated with in (FlinkKafkaProducer.java:0) -Class is annotated with in (FlinkKafkaProducer.java:0) -Class extends class in (FlinkKafkaProducer.java:0) -Class is annotated with in (FlinkKafkaProducer.java:0) -Class extends class in (FlinkKafkaProducer.java:0) -Class is annotated with in (FlinkKafkaProducer.java:0) -Class is annotated with in (KafkaShuffleFetcher.java:0) -Class is annotated with in (KafkaShuffleFetcher.java:0) -Class is annotated with in (KafkaShuffleFetcher.java:0) -Class is annotated with in (KafkaShuffleFetcher.java:0) Constructor (org.apache.flink.connector.kafka.dynamic.source.enumerator.subscriber.KafkaStreamSubscriber, org.apache.flink.connector.kafka.dynamic.metadata.KafkaMetadataService, org.apache.flink.api.connector.source.SplitEnumeratorContext, org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer, org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer, java.util.Properties, org.apache.flink.api.connector.source.Boundedness, org.apache.flink.connector.kafka.dynamic.source.enumerator.DynamicKafkaSourceEnumState, org.apache.flink.connector.kafka.dynamic.source.enumerator.StoppableKafkaEnumContextProxy$StoppableKafkaEnumContextProxyFactory)> is annotated with in (DynamicKafkaSourceEnumerator.java:0) Constructor (org.apache.flink.api.connector.source.SourceReaderContext, org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema, java.util.Properties)> calls constructor (int)> in (DynamicKafkaSourceReader.java:114) -Constructor ()> calls constructor ()> in (FlinkKafkaProducer.java:1733) -Constructor (java.util.Set)> is annotated with in (FlinkKafkaProducer.java:0) -Constructor (java.lang.String, long, short, org.apache.flink.streaming.connectors.kafka.internals.FlinkKafkaInternalProducer)> is annotated with in (FlinkKafkaProducer.java:0) -Constructor (java.lang.String, org.apache.flink.streaming.connectors.kafka.internals.FlinkKafkaInternalProducer)> is annotated with in (FlinkKafkaProducer.java:0) -Constructor (org.apache.flink.streaming.connectors.kafka.internals.FlinkKafkaInternalProducer)> is annotated with in (FlinkKafkaProducer.java:0) -Constructor ()> calls constructor ()> in (FlinkKafkaProducer.java:1879) -Constructor ()> calls constructor ()> in (FlinkKafkaProducer.java:1630) -Constructor (java.lang.Object, java.util.List, org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer, org.apache.flink.streaming.runtime.tasks.ProcessingTimeService, long)> has parameter of type in (AbstractFetcher.java:0) -Constructor (java.lang.Object, java.util.List, org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer, org.apache.flink.streaming.runtime.tasks.ProcessingTimeService, long)> has parameter of type in (AbstractFetcher.java:0) -Constructor (org.apache.flink.api.common.typeutils.TypeSerializer)> is annotated with in (KafkaShuffleFetcher.java:0) +Constructor (java.util.List, java.util.regex.Pattern, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema, [Lorg.apache.flink.table.data.RowData$FieldGetter;, [Lorg.apache.flink.table.data.RowData$FieldGetter;, boolean, [I, boolean)> has parameter of type <[Lorg.apache.flink.table.data.RowData$FieldGetter;> in (DynamicKafkaRecordSerializationSchema.java:0) Field has generic type > with type argument depending on in (KafkaClusterMetricGroupManager.java:0) Field has type in (DynamicKafkaSourceReader.java:0) -Field has type in (AbstractFetcher.java:0) -Field has type in (AbstractFetcher.java:0) -Field has type in (KafkaShuffleFetcher.java:0) -Field has type in (FlinkKafkaShuffleProducer.java:0) Field has type <[Lorg.apache.flink.table.data.RowData$FieldGetter;> in (DynamicKafkaRecordSerializationSchema.java:0) Field has type <[Lorg.apache.flink.table.data.RowData$FieldGetter;> in (DynamicKafkaRecordSerializationSchema.java:0) Method is annotated with in (DynamicKafkaSource.java:0) @@ -61,150 +36,9 @@ Method is annotated with in (KafkaPartitionSplitReader.java:0) Method is annotated with in (KafkaSourceReader.java:0) Method is annotated with in (KafkaSourceReader.java:0) -Method calls method in (AbstractFetcher.java:604) -Method calls method in (AbstractFetcher.java:608) -Method calls method in (AbstractFetcher.java:608) -Method calls method in (AbstractFetcher.java:593) -Method calls method in (AbstractFetcher.java:593) -Method calls constructor ([B)> in (KafkaShuffleFetcher.java:240) -Method calls method in (KafkaShuffleFetcher.java:244) -Method calls method in (KafkaShuffleFetcher.java:245) -Method calls method in (KafkaShuffleFetcher.java:254) -Method calls method in (KafkaShuffleFetcher.java:251) -Method calls method in (KafkaShuffleFetcher.java:255) -Method calls method in (KafkaShuffleFetcher.java:238) -Method gets field in (KafkaShuffleFetcher.java:244) -Method gets field in (KafkaShuffleFetcher.java:245) -Method gets field in (KafkaShuffleFetcher.java:253) -Method gets field in (KafkaShuffleFetcher.java:250) -Method gets field in (KafkaShuffleFetcher.java:254) -Method is annotated with in (KafkaShuffleFetcher.java:0) -Method calls constructor (int)> in (FlinkKafkaShuffleProducer.java:186) -Method calls method in (FlinkKafkaShuffleProducer.java:205) -Method calls method in (FlinkKafkaShuffleProducer.java:204) -Method calls method in (FlinkKafkaShuffleProducer.java:190) -Method calls method in (FlinkKafkaShuffleProducer.java:193) -Method calls method in (FlinkKafkaShuffleProducer.java:195) -Method calls method in (FlinkKafkaShuffleProducer.java:196) -Method calls constructor (int)> in (FlinkKafkaShuffleProducer.java:212) -Method calls method in (FlinkKafkaShuffleProducer.java:225) -Method calls method in (FlinkKafkaShuffleProducer.java:224) -Method calls method in (FlinkKafkaShuffleProducer.java:216) -Method calls method in (FlinkKafkaShuffleProducer.java:217) -Method calls method in (FlinkKafkaShuffleProducer.java:218) -Method calls method in (FlinkKafkaShuffleProducer.java:219) Method has parameter of type <[Lorg.apache.flink.table.data.RowData$FieldGetter;> in (DynamicKafkaRecordSerializationSchema.java:0) Method calls method in (KafkaConnectorOptionsUtil.java:543) Method calls method in (KafkaConnectorOptionsUtil.java:587) Method calls method in (KafkaDynamicSink.java:386) Method has return type <[Lorg.apache.flink.table.data.RowData$FieldGetter;> in (KafkaDynamicSink.java:0) Method calls method in (KafkaDynamicSource.java:566) -Constructor (java.lang.String, org.apache.flink.connector.kafka.dynamic.metadata.KafkaMetadataService, org.apache.flink.api.connector.source.SplitEnumeratorContext, java.lang.Runnable)> calls constructor (java.lang.String)> in (StoppableKafkaEnumContextProxy.java:95) -Constructor (java.util.function.Function, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.kafka.sink.HeaderProvider)> calls method in (KafkaRecordSerializationSchemaBuilder.java:322) -Constructor (java.util.function.Function, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.kafka.sink.HeaderProvider)> calls method in (KafkaRecordSerializationSchemaBuilder.java:323) -Constructor (java.lang.Class, boolean, java.util.Map, java.util.function.Function)> calls method in (KafkaSerializerWrapper.java:51) -Constructor (java.lang.Class, boolean, java.util.Map, java.util.function.Function)> calls method in (KafkaSerializerWrapper.java:53) -Constructor (java.lang.Class, boolean, java.util.Map, java.util.function.Function)> calls method in (KafkaSerializerWrapper.java:54) -Constructor (org.apache.flink.connector.base.DeliveryGuarantee, java.util.Properties, java.lang.String, org.apache.flink.api.connector.sink2.Sink$InitContext, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema$InitializationContext, java.util.Collection)> calls method in (KafkaWriter.java:134) -Constructor (org.apache.flink.connector.base.DeliveryGuarantee, java.util.Properties, java.lang.String, org.apache.flink.api.connector.sink2.Sink$InitContext, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema$InitializationContext, java.util.Collection)> calls method in (KafkaWriter.java:135) -Constructor (org.apache.flink.connector.base.DeliveryGuarantee, java.util.Properties, java.lang.String, org.apache.flink.api.connector.sink2.Sink$InitContext, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema$InitializationContext, java.util.Collection)> calls method in (KafkaWriter.java:136) -Constructor (org.apache.flink.connector.base.DeliveryGuarantee, java.util.Properties, java.lang.String, org.apache.flink.api.connector.sink2.Sink$InitContext, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema$InitializationContext, java.util.Collection)> calls method in (KafkaWriter.java:137) -Constructor (org.apache.flink.connector.base.DeliveryGuarantee, java.util.Properties, java.lang.String, org.apache.flink.api.connector.sink2.Sink$InitContext, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema$InitializationContext, java.util.Collection)> calls method in (KafkaWriter.java:138) -Constructor (org.apache.flink.connector.base.DeliveryGuarantee, java.util.Properties, java.lang.String, org.apache.flink.api.connector.sink2.Sink$InitContext, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema$InitializationContext, java.util.Collection)> calls method in (KafkaWriter.java:173) -Constructor (java.lang.String)> calls method in (KafkaWriterState.java:28) -Constructor (java.lang.Object, java.util.function.Consumer)> calls method in (Recyclable.java:31) -Constructor (java.lang.Object, java.util.function.Consumer)> calls method in (Recyclable.java:32) -Constructor (int, int, java.util.function.Function, java.util.function.Consumer)> calls method in (TransactionAborter.java:60) -Constructor (java.util.Set)> calls method in (FlinkKafkaProducer.java:1591) -Constructor (java.lang.Object, java.util.List, org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer, org.apache.flink.streaming.runtime.tasks.ProcessingTimeService, long)> calls method in (AbstractFetcher.java:593) -Constructor (java.lang.Object, java.util.List, org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer, org.apache.flink.streaming.runtime.tasks.ProcessingTimeService, long)> calls method in (AbstractFetcher.java:595) -Constructor (org.apache.flink.streaming.connectors.kafka.internals.KafkaConsumerThread, org.apache.flink.streaming.connectors.kafka.internals.KafkaCommitCallback)> calls method in (KafkaConsumerThread.java:540) -Constructor (int, org.apache.flink.api.common.serialization.DeserializationSchema, [I, org.apache.flink.api.common.serialization.DeserializationSchema, [I, boolean, [Lorg.apache.flink.streaming.connectors.kafka.table.DynamicKafkaDeserializationSchema$MetadataConverter;, org.apache.flink.api.common.typeinfo.TypeInformation, boolean)> calls method in (DynamicKafkaDeserializationSchema.java:72) -Constructor (java.util.List, java.util.regex.Pattern, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema, [Lorg.apache.flink.table.data.RowData$FieldGetter;, [Lorg.apache.flink.table.data.RowData$FieldGetter;, boolean, [I, boolean)> calls method in (DynamicKafkaRecordSerializationSchema.java:71) -Constructor (java.util.List, java.util.regex.Pattern, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema, [Lorg.apache.flink.table.data.RowData$FieldGetter;, [Lorg.apache.flink.table.data.RowData$FieldGetter;, boolean, [I, boolean)> calls method in (DynamicKafkaRecordSerializationSchema.java:75) -Constructor (java.util.List, java.util.regex.Pattern, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema, [Lorg.apache.flink.table.data.RowData$FieldGetter;, [Lorg.apache.flink.table.data.RowData$FieldGetter;, boolean, [I, boolean)> calls method in (DynamicKafkaRecordSerializationSchema.java:87) -Constructor (java.util.List, java.util.regex.Pattern, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.api.common.serialization.SerializationSchema, org.apache.flink.api.common.serialization.SerializationSchema, [Lorg.apache.flink.table.data.RowData$FieldGetter;, [Lorg.apache.flink.table.data.RowData$FieldGetter;, boolean, [I, boolean)> has parameter of type <[Lorg.apache.flink.table.data.RowData$FieldGetter;> in (DynamicKafkaRecordSerializationSchema.java:0) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:181) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:161) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:163) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:166) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:167) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:168) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:176) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.EncodingFormat, org.apache.flink.table.connector.format.EncodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.connector.kafka.sink.KafkaPartitioner, org.apache.flink.connector.base.DeliveryGuarantee, boolean, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, java.lang.Integer, java.lang.String)> calls method in (KafkaDynamicSink.java:179) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:210) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:194) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:198) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:201) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:203) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:216) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:218) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:220) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:224) -Constructor (org.apache.flink.table.types.DataType, org.apache.flink.table.connector.format.DecodingFormat, org.apache.flink.table.connector.format.DecodingFormat, [I, [I, java.lang.String, java.util.List, java.util.regex.Pattern, java.util.Properties, org.apache.flink.streaming.connectors.kafka.config.StartupMode, java.util.Map, long, org.apache.flink.streaming.connectors.kafka.config.BoundedMode, java.util.Map, long, boolean, java.lang.String)> calls method in (KafkaDynamicSource.java:226) -Constructor (org.apache.flink.connector.kafka.sink.TwoPhaseCommittingStatefulSink$PrecommittingStatefulSinkWriter, org.apache.flink.table.types.DataType, [I, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, org.apache.flink.api.common.operators.ProcessingTimeService, java.util.function.Function)> calls method in (ReducingUpsertWriter.java:70) -Constructor (org.apache.flink.connector.kafka.sink.TwoPhaseCommittingStatefulSink$PrecommittingStatefulSinkWriter, org.apache.flink.table.types.DataType, [I, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, org.apache.flink.api.common.operators.ProcessingTimeService, java.util.function.Function)> calls method in (ReducingUpsertWriter.java:71) -Constructor (org.apache.flink.connector.kafka.sink.TwoPhaseCommittingStatefulSink$PrecommittingStatefulSinkWriter, org.apache.flink.table.types.DataType, [I, org.apache.flink.streaming.connectors.kafka.table.SinkBufferFlushMode, org.apache.flink.api.common.operators.ProcessingTimeService, java.util.function.Function)> calls method in (ReducingUpsertWriter.java:72) -Method calls method in (DynamicKafkaSourceBuilder.java:291) -Method calls method in (DynamicKafkaSourceBuilder.java:293) -Method calls method in (DynamicKafkaSourceBuilder.java:295) -Method calls method in (DynamicKafkaSourceBuilder.java:299) -Method calls method in (DynamicKafkaSourceBuilder.java:100) -Method calls method in (DynamicKafkaSourceBuilder.java:73) -Method calls method in (DynamicKafkaSourceBuilder.java:86) -Method calls method in (DynamicKafkaSourceEnumStateSerializer.java:142) -Method calls method in (DynamicKafkaSourceEnumerator.java:514) -Method calls method in (StoppableKafkaEnumContextProxy.java:237) -Method calls method in (StoppableKafkaEnumContextProxy.java:259) -Method calls method in (DynamicKafkaSourceReader.java:418) -Method calls method in (DynamicKafkaSourceReader.java:228) -Method calls method in (FlinkKafkaInternalProducer.java:100) -Method calls method in (FlinkKafkaInternalProducer.java:109) -Method calls method in (FlinkKafkaInternalProducer.java:296) -Method calls method in (FlinkKafkaInternalProducer.java:297) -Method calls method in (FlinkKafkaInternalProducer.java:174) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:268) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:269) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:283) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:279) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:218) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:154) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:112) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:99) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:124) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:123) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:139) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:137) -Method calls method in (KafkaRecordSerializationSchemaBuilder.java:204) -Method calls method in (KafkaSerializerWrapper.java:71) -Method calls method in (KafkaSerializerWrapper.java:88) -Method calls method in (KafkaSinkBuilder.java:194) -Method calls method in (KafkaSinkBuilder.java:202) -Method calls method in (KafkaSinkBuilder.java:198) -Method calls method in (KafkaSinkBuilder.java:111) -Method calls method in (KafkaSinkBuilder.java:97) -Method calls method in (KafkaSinkBuilder.java:123) -Method calls method in (KafkaSinkBuilder.java:133) -Method calls method in (KafkaSinkBuilder.java:151) -Method calls method in (KafkaSinkBuilder.java:175) -Method calls method in (KafkaSinkBuilder.java:176) -Method calls method in (KafkaWriter.java:244) -Method calls method in (KafkaWriter.java:245) -Method calls method in (KafkaWriter.java:246) -Method calls method in (KafkaWriter.java:311) -Method calls method in (Recyclable.java:36) -Method calls method in (KafkaSourceBuilder.java:513) -Method calls method in (KafkaSourceBuilder.java:518) -Method calls method in (KafkaSourceBuilder.java:522) -Method calls method in (KafkaSourceBuilder.java:524) -Method calls method in (KafkaSourceBuilder.java:203) -Method calls method in (ReaderHandledOffsetsInitializer.java:75) -Method calls method in (SpecifiedOffsetsInitializer.java:105) -Method calls method in (KafkaPartitionSplitReader.java:540) -Method calls method in (KafkaPartitionSplitReader.java:359) -Method calls method in (KafkaValueOnlyDeserializerWrapper.java:65) -Method calls method in (KafkaShuffleFetcher.java:280) -Method calls method in (KafkaConnectorOptionsUtil.java:500) -Method calls method in (KafkaConnectorOptionsUtil.java:567) -Method calls method in (KafkaConnectorOptionsUtil.java:480) -Method calls method in (ReducingUpsertWriter.java:177) diff --git a/flink-connector-kafka/archunit-violations/d853eb69-8c04-4246-9a5e-4f5911286b1d b/flink-connector-kafka/archunit-violations/d853eb69-8c04-4246-9a5e-4f5911286b1d index dfb76a0c5..5434e6d42 100644 --- a/flink-connector-kafka/archunit-violations/d853eb69-8c04-4246-9a5e-4f5911286b1d +++ b/flink-connector-kafka/archunit-violations/d853eb69-8c04-4246-9a5e-4f5911286b1d @@ -1,5 +1,3 @@ org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema.open(org.apache.flink.api.common.serialization.SerializationSchema$InitializationContext, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema$KafkaSinkContext): Argument leaf type org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema$KafkaSinkContext does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema.serialize(java.lang.Object, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema$KafkaSinkContext, java.lang.Long): Argument leaf type org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema$KafkaSinkContext does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer.getPartitionOffsets(java.util.Collection, org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer$PartitionOffsetsRetriever): Argument leaf type org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer$PartitionOffsetsRetriever does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated -org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.invoke(org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer$KafkaTransactionState, java.lang.Object, org.apache.flink.streaming.api.functions.sink.SinkFunction$Context): Argument leaf type org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer$KafkaTransactionState does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated -org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition.dropLeaderData(java.util.List): Argument leaf type org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionLeader does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated diff --git a/flink-connector-kafka/archunit-violations/e0624cac-4ea1-4bf8-879a-ecedb41ce334 b/flink-connector-kafka/archunit-violations/e0624cac-4ea1-4bf8-879a-ecedb41ce334 index b591e33c6..dfb76a0c5 100644 --- a/flink-connector-kafka/archunit-violations/e0624cac-4ea1-4bf8-879a-ecedb41ce334 +++ b/flink-connector-kafka/archunit-violations/e0624cac-4ea1-4bf8-879a-ecedb41ce334 @@ -2,4 +2,4 @@ org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema.open(org.ap org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema.serialize(java.lang.Object, org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema$KafkaSinkContext, java.lang.Long): Argument leaf type org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema$KafkaSinkContext does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer.getPartitionOffsets(java.util.Collection, org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer$PartitionOffsetsRetriever): Argument leaf type org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer$PartitionOffsetsRetriever does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.invoke(org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer$KafkaTransactionState, java.lang.Object, org.apache.flink.streaming.api.functions.sink.SinkFunction$Context): Argument leaf type org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer$KafkaTransactionState does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated -org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition.dropLeaderData(java.util.List): Argument leaf type org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionLeader does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated \ No newline at end of file +org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition.dropLeaderData(java.util.List): Argument leaf type org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionLeader does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilder.java b/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilder.java index 0fba3a364..cc1f803c4 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilder.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilder.java @@ -30,7 +30,6 @@ import org.apache.flink.connector.kafka.lineage.TypeDatasetFacet; import org.apache.flink.connector.kafka.lineage.TypeDatasetFacetProvider; import org.apache.flink.connector.kafka.source.KafkaSource; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import com.google.common.reflect.TypeToken; import org.apache.kafka.clients.producer.ProducerRecord; @@ -103,20 +102,6 @@ public class KafkaRecordSerializationSchemaBuilder { @Nullable private SerializationSchema keySerializationSchema; @Nullable private HeaderProvider headerProvider; - /** - * Sets a custom partitioner determining the target partition of the target topic. - * - * @param partitioner - * @return {@code this} - * @deprecated use {@link #setPartitioner(KafkaPartitioner)} - */ - public KafkaRecordSerializationSchemaBuilder setPartitioner( - FlinkKafkaPartitioner partitioner) { - KafkaRecordSerializationSchemaBuilder self = self(); - self.partitioner = checkNotNull(partitioner); - return self; - } - /** * Sets a custom partitioner determining the target partition of the target topic. * diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaSinkBuilder.java b/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaSinkBuilder.java index f0c20cfc0..5ac18709e 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaSinkBuilder.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/sink/KafkaSinkBuilder.java @@ -98,20 +98,6 @@ public KafkaSinkBuilder setDeliveryGuarantee(DeliveryGuarantee deliveryGuara return this; } - /** - * Sets the wanted the {@link DeliveryGuarantee}. The default delivery guarantee is {@link - * #deliveryGuarantee}. - * - * @param deliveryGuarantee - * @return {@link KafkaSinkBuilder} - * @deprecated Will be removed in future versions. Use {@link #setDeliveryGuarantee} instead. - */ - @Deprecated - public KafkaSinkBuilder setDeliverGuarantee(DeliveryGuarantee deliveryGuarantee) { - this.deliveryGuarantee = checkNotNull(deliveryGuarantee, "deliveryGuarantee"); - return this; - } - /** * Sets the configuration which used to instantiate all used {@link * org.apache.kafka.clients.producer.KafkaProducer}. diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumer.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumer.java deleted file mode 100644 index 1e506a8f2..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumer.java +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.api.common.serialization.DeserializationSchema; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaFetcher; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaPartitionDiscoverer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor; -import org.apache.flink.util.PropertiesUtil; -import org.apache.flink.util.SerializedValue; - -import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.apache.kafka.clients.consumer.KafkaConsumer; -import org.apache.kafka.clients.consumer.OffsetAndTimestamp; -import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.serialization.ByteArrayDeserializer; - -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.regex.Pattern; - -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.apache.flink.util.PropertiesUtil.getBoolean; -import static org.apache.flink.util.PropertiesUtil.getLong; - -/** - * The Flink Kafka Consumer is a streaming data source that pulls a parallel data stream from Apache - * Kafka. The consumer can run in multiple parallel instances, each of which will pull data from one - * or more Kafka partitions. - * - *

The Flink Kafka Consumer participates in checkpointing and guarantees that no data is lost - * during a failure, and that the computation processes elements "exactly once". (Note: These - * guarantees naturally assume that Kafka itself does not lose any data.) - * - *

Please note that Flink snapshots the offsets internally as part of its distributed - * checkpoints. The offsets committed to Kafka are only to bring the outside view of progress in - * sync with Flink's view of the progress. That way, monitoring and other jobs can get a view of how - * far the Flink Kafka consumer has consumed a topic. - * - *

Please refer to Kafka's documentation for the available configuration properties: - * http://kafka.apache.org/documentation.html#newconsumerconfigs - */ -@PublicEvolving -@Deprecated -public class FlinkKafkaConsumer extends FlinkKafkaConsumerBase { - - private static final long serialVersionUID = 1L; - - /** Configuration key to change the polling timeout. * */ - public static final String KEY_POLL_TIMEOUT = "flink.poll-timeout"; - - /** - * From Kafka's Javadoc: The time, in milliseconds, spent waiting in poll if data is not - * available. If 0, returns immediately with any records that are available now. - */ - public static final long DEFAULT_POLL_TIMEOUT = 100L; - - // ------------------------------------------------------------------------ - - /** User-supplied properties for Kafka. * */ - protected final Properties properties; - - /** - * From Kafka's Javadoc: The time, in milliseconds, spent waiting in poll if data is not - * available. If 0, returns immediately with any records that are available now - */ - protected final long pollTimeout; - - // ------------------------------------------------------------------------ - - /** - * Creates a new Kafka streaming source consumer. - * - * @param topic The name of the topic that should be consumed. - * @param valueDeserializer The de-/serializer used to convert between Kafka's byte messages and - * Flink's objects. - * @param props - */ - public FlinkKafkaConsumer( - String topic, DeserializationSchema valueDeserializer, Properties props) { - this(Collections.singletonList(topic), valueDeserializer, props); - } - - /** - * Creates a new Kafka streaming source consumer. - * - *

This constructor allows passing a {@see KafkaDeserializationSchema} for reading key/value - * pairs, offsets, and topic names from Kafka. - * - * @param topic The name of the topic that should be consumed. - * @param deserializer The keyed de-/serializer used to convert between Kafka's byte messages - * and Flink's objects. - * @param props - */ - public FlinkKafkaConsumer( - String topic, KafkaDeserializationSchema deserializer, Properties props) { - this(Collections.singletonList(topic), deserializer, props); - } - - /** - * Creates a new Kafka streaming source consumer. - * - *

This constructor allows passing multiple topics to the consumer. - * - * @param topics The Kafka topics to read from. - * @param deserializer The de-/serializer used to convert between Kafka's byte messages and - * Flink's objects. - * @param props - */ - public FlinkKafkaConsumer( - List topics, DeserializationSchema deserializer, Properties props) { - this(topics, new KafkaDeserializationSchemaWrapper<>(deserializer), props); - } - - /** - * Creates a new Kafka streaming source consumer. - * - *

This constructor allows passing multiple topics and a key/value deserialization schema. - * - * @param topics The Kafka topics to read from. - * @param deserializer The keyed de-/serializer used to convert between Kafka's byte messages - * and Flink's objects. - * @param props - */ - public FlinkKafkaConsumer( - List topics, KafkaDeserializationSchema deserializer, Properties props) { - this(topics, null, deserializer, props); - } - - /** - * Creates a new Kafka streaming source consumer. Use this constructor to subscribe to multiple - * topics based on a regular expression pattern. - * - *

If partition discovery is enabled (by setting a non-negative value for {@link - * FlinkKafkaConsumer#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics with - * names matching the pattern will also be subscribed to as they are created on the fly. - * - * @param subscriptionPattern The regular expression for a pattern of topic names to subscribe - * to. - * @param valueDeserializer The de-/serializer used to convert between Kafka's byte messages and - * Flink's objects. - * @param props - */ - public FlinkKafkaConsumer( - Pattern subscriptionPattern, - DeserializationSchema valueDeserializer, - Properties props) { - this( - null, - subscriptionPattern, - new KafkaDeserializationSchemaWrapper<>(valueDeserializer), - props); - } - - /** - * Creates a new Kafka streaming source consumer. Use this constructor to subscribe to multiple - * topics based on a regular expression pattern. - * - *

If partition discovery is enabled (by setting a non-negative value for {@link - * FlinkKafkaConsumer#KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS} in the properties), topics with - * names matching the pattern will also be subscribed to as they are created on the fly. - * - *

This constructor allows passing a {@see KafkaDeserializationSchema} for reading key/value - * pairs, offsets, and topic names from Kafka. - * - * @param subscriptionPattern The regular expression for a pattern of topic names to subscribe - * to. - * @param deserializer The keyed de-/serializer used to convert between Kafka's byte messages - * and Flink's objects. - * @param props - */ - public FlinkKafkaConsumer( - Pattern subscriptionPattern, - KafkaDeserializationSchema deserializer, - Properties props) { - this(null, subscriptionPattern, deserializer, props); - } - - private FlinkKafkaConsumer( - List topics, - Pattern subscriptionPattern, - KafkaDeserializationSchema deserializer, - Properties props) { - - super( - topics, - subscriptionPattern, - deserializer, - getLong( - checkNotNull(props, "props"), - KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS, - PARTITION_DISCOVERY_DISABLED), - !getBoolean(props, KEY_DISABLE_METRICS, false)); - - this.properties = props; - setDeserializer(this.properties); - - // configure the polling timeout - try { - if (properties.containsKey(KEY_POLL_TIMEOUT)) { - this.pollTimeout = Long.parseLong(properties.getProperty(KEY_POLL_TIMEOUT)); - } else { - this.pollTimeout = DEFAULT_POLL_TIMEOUT; - } - } catch (Exception e) { - throw new IllegalArgumentException( - "Cannot parse poll timeout for '" + KEY_POLL_TIMEOUT + '\'', e); - } - } - - @Override - protected AbstractFetcher createFetcher( - SourceContext sourceContext, - Map assignedPartitionsWithInitialOffsets, - SerializedValue> watermarkStrategy, - StreamingRuntimeContext runtimeContext, - OffsetCommitMode offsetCommitMode, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - - // make sure that auto commit is disabled when our offset commit mode is ON_CHECKPOINTS; - // this overwrites whatever setting the user configured in the properties - adjustAutoCommitConfig(properties, offsetCommitMode); - - return new KafkaFetcher<>( - sourceContext, - assignedPartitionsWithInitialOffsets, - watermarkStrategy, - runtimeContext.getProcessingTimeService(), - runtimeContext.getExecutionConfig().getAutoWatermarkInterval(), - runtimeContext.getUserCodeClassLoader(), - runtimeContext.getTaskNameWithSubtasks(), - deserializer, - properties, - pollTimeout, - runtimeContext.getMetricGroup(), - consumerMetricGroup, - useMetrics); - } - - @Override - protected AbstractPartitionDiscoverer createPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks) { - - return new KafkaPartitionDiscoverer( - topicsDescriptor, indexOfThisSubtask, numParallelSubtasks, properties); - } - - @Override - protected Map fetchOffsetsWithTimestamp( - Collection partitions, long timestamp) { - - Map partitionOffsetsRequest = new HashMap<>(partitions.size()); - for (KafkaTopicPartition partition : partitions) { - partitionOffsetsRequest.put( - new TopicPartition(partition.getTopic(), partition.getPartition()), timestamp); - } - - final Map result = new HashMap<>(partitions.size()); - // use a short-lived consumer to fetch the offsets; - // this is ok because this is a one-time operation that happens only on startup - try (KafkaConsumer consumer = new KafkaConsumer(properties)) { - for (Map.Entry partitionToOffset : - consumer.offsetsForTimes(partitionOffsetsRequest).entrySet()) { - - result.put( - new KafkaTopicPartition( - partitionToOffset.getKey().topic(), - partitionToOffset.getKey().partition()), - (partitionToOffset.getValue() == null) - ? null - : partitionToOffset.getValue().offset()); - } - } - return result; - } - - @Override - protected boolean getIsAutoCommitEnabled() { - return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) - && PropertiesUtil.getLong( - properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) - > 0; - } - - /** - * Makes sure that the ByteArrayDeserializer is registered in the Kafka properties. - * - * @param props The Kafka properties to register the serializer in. - */ - private static void setDeserializer(Properties props) { - final String deSerName = ByteArrayDeserializer.class.getName(); - - Object keyDeSer = props.get(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG); - Object valDeSer = props.get(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG); - - if (keyDeSer != null && !keyDeSer.equals(deSerName)) { - LOG.warn( - "Ignoring configured key DeSerializer ({})", - ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG); - } - if (valDeSer != null && !valDeSer.equals(deSerName)) { - LOG.warn( - "Ignoring configured value DeSerializer ({})", - ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG); - } - - props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deSerName); - props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deSerName); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBase.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBase.java deleted file mode 100644 index 7a85b434e..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBase.java +++ /dev/null @@ -1,1228 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters; -import org.apache.flink.api.common.state.CheckpointListener; -import org.apache.flink.api.common.state.ListState; -import org.apache.flink.api.common.state.ListStateDescriptor; -import org.apache.flink.api.common.state.OperatorStateStore; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.common.typeutils.base.LongSerializer; -import org.apache.flink.api.java.ClosureCleaner; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.api.java.typeutils.runtime.TupleSerializer; -import org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.metrics.Counter; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.runtime.state.FunctionSnapshotContext; -import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; -import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; -import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; -import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitModes; -import org.apache.flink.streaming.connectors.kafka.config.StartupMode; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaCommitCallback; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionAssigner; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionStateSentinel; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor; -import org.apache.flink.streaming.runtime.operators.util.AssignerWithPeriodicWatermarksAdapter; -import org.apache.flink.streaming.runtime.operators.util.AssignerWithPunctuatedWatermarksAdapter; -import org.apache.flink.util.ExceptionUtils; -import org.apache.flink.util.SerializedValue; - -import org.apache.commons.collections.map.LinkedMap; -import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; -import java.util.concurrent.atomic.AtomicReference; -import java.util.regex.Pattern; - -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.COMMITS_FAILED_METRICS_COUNTER; -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.COMMITS_SUCCEEDED_METRICS_COUNTER; -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.KAFKA_CONSUMER_METRICS_GROUP; -import static org.apache.flink.util.Preconditions.checkArgument; -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Base class of all Flink Kafka Consumer data sources. This implements the common behavior across - * all Kafka versions. - * - *

The Kafka version specific behavior is defined mainly in the specific subclasses of the {@link - * AbstractFetcher}. - * - * @param The type of records produced by this data source - */ -@Internal -@Deprecated -public abstract class FlinkKafkaConsumerBase extends RichParallelSourceFunction - implements CheckpointListener, ResultTypeQueryable, CheckpointedFunction { - - private static final long serialVersionUID = -6272159445203409112L; - - protected static final Logger LOG = LoggerFactory.getLogger(FlinkKafkaConsumerBase.class); - - /** The maximum number of pending non-committed checkpoints to track, to avoid memory leaks. */ - public static final int MAX_NUM_PENDING_CHECKPOINTS = 100; - - /** - * The default interval to execute partition discovery, in milliseconds ({@code Long.MIN_VALUE}, - * i.e. disabled by default). - */ - public static final long PARTITION_DISCOVERY_DISABLED = Long.MIN_VALUE; - - /** Boolean configuration key to disable metrics tracking. * */ - public static final String KEY_DISABLE_METRICS = "flink.disable-metrics"; - - /** Configuration key to define the consumer's partition discovery interval, in milliseconds. */ - public static final String KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS = - "flink.partition-discovery.interval-millis"; - - /** State name of the consumer's partition offset states. */ - private static final String OFFSETS_STATE_NAME = "topic-partition-offset-states"; - - // ------------------------------------------------------------------------ - // configuration state, set on the client relevant for all subtasks - // ------------------------------------------------------------------------ - - /** Describes whether we are discovering partitions for fixed topics or a topic pattern. */ - private final KafkaTopicsDescriptor topicsDescriptor; - - /** The schema to convert between Kafka's byte messages, and Flink's objects. */ - protected final KafkaDeserializationSchema deserializer; - - /** - * The set of topic partitions that the source will read, with their initial offsets to start - * reading from. - */ - private Map subscribedPartitionsToStartOffsets; - - /** - * Optional watermark strategy that will be run per Kafka partition, to exploit per-partition - * timestamp characteristics. The watermark strategy is kept in serialized form, to deserialize - * it into multiple copies. - */ - private SerializedValue> watermarkStrategy; - - /** - * User-set flag determining whether or not to commit on checkpoints. Note: this flag does not - * represent the final offset commit mode. - */ - private boolean enableCommitOnCheckpoints = true; - - /** User-set flag to disable filtering restored partitions with current topics descriptor. */ - private boolean filterRestoredPartitionsWithCurrentTopicsDescriptor = true; - - /** - * The offset commit mode for the consumer. The value of this can only be determined in {@link - * FlinkKafkaConsumerBase#open(Configuration)} since it depends on whether or not checkpointing - * is enabled for the job. - */ - private OffsetCommitMode offsetCommitMode; - - /** User configured value for discovery interval, in milliseconds. */ - private final long discoveryIntervalMillis; - - /** The startup mode for the consumer (default is {@link StartupMode#GROUP_OFFSETS}). */ - private StartupMode startupMode = StartupMode.GROUP_OFFSETS; - - /** - * Specific startup offsets; only relevant when startup mode is {@link - * StartupMode#SPECIFIC_OFFSETS}. - */ - private Map specificStartupOffsets; - - /** - * Timestamp to determine startup offsets; only relevant when startup mode is {@link - * StartupMode#TIMESTAMP}. - */ - private Long startupOffsetsTimestamp; - - // ------------------------------------------------------------------------ - // runtime state (used individually by each parallel subtask) - // ------------------------------------------------------------------------ - - /** Data for pending but uncommitted offsets. */ - private final LinkedMap pendingOffsetsToCommit = new LinkedMap(); - - /** The fetcher implements the connections to the Kafka brokers. */ - private transient volatile AbstractFetcher kafkaFetcher; - - /** The partition discoverer, used to find new partitions. */ - private transient volatile AbstractPartitionDiscoverer partitionDiscoverer; - - /** - * The offsets to restore to, if the consumer restores state from a checkpoint. - * - *

This map will be populated by the {@link #initializeState(FunctionInitializationContext)} - * method. - * - *

Using a sorted map as the ordering is important when using restored state to seed the - * partition discoverer. - */ - private transient volatile TreeMap restoredState; - - /** Accessor for state in the operator state backend. */ - private transient ListState> unionOffsetStates; - - /** Discovery loop, executed in a separate thread. */ - private transient volatile Thread discoveryLoopThread; - - /** Flag indicating whether the consumer is still running. */ - private volatile boolean running = true; - - // ------------------------------------------------------------------------ - // internal metrics - // ------------------------------------------------------------------------ - - /** - * Flag indicating whether or not metrics should be exposed. If {@code true}, offset metrics - * (e.g. current offset, committed offset) and Kafka-shipped metrics will be registered. - */ - private final boolean useMetrics; - - /** Counter for successful Kafka offset commits. */ - private transient Counter successfulCommits; - - /** Counter for failed Kafka offset commits. */ - private transient Counter failedCommits; - - /** - * Callback interface that will be invoked upon async Kafka commit completion. Please be aware - * that default callback implementation in base class does not provide any guarantees on - * thread-safety. This is sufficient for now because current supported Kafka connectors - * guarantee no more than 1 concurrent async pending offset commit. - */ - private transient KafkaCommitCallback offsetCommitCallback; - - // ------------------------------------------------------------------------ - - /** - * Base constructor. - * - * @param topics fixed list of topics to subscribe to (null, if using topic pattern) - * @param topicPattern the topic pattern to subscribe to (null, if using fixed topics) - * @param deserializer The deserializer to turn raw byte messages into Java/Scala objects. - * @param discoveryIntervalMillis the topic / partition discovery interval, in milliseconds (0 - * if discovery is disabled). - */ - public FlinkKafkaConsumerBase( - List topics, - Pattern topicPattern, - KafkaDeserializationSchema deserializer, - long discoveryIntervalMillis, - boolean useMetrics) { - this.topicsDescriptor = new KafkaTopicsDescriptor(topics, topicPattern); - this.deserializer = checkNotNull(deserializer, "valueDeserializer"); - - checkArgument( - discoveryIntervalMillis == PARTITION_DISCOVERY_DISABLED - || discoveryIntervalMillis >= 0, - "Cannot define a negative value for the topic / partition discovery interval."); - this.discoveryIntervalMillis = discoveryIntervalMillis; - - this.useMetrics = useMetrics; - } - - /** - * Make sure that auto commit is disabled when our offset commit mode is ON_CHECKPOINTS. This - * overwrites whatever setting the user configured in the properties. - * - * @param properties - Kafka configuration properties to be adjusted - * @param offsetCommitMode offset commit mode - */ - protected static void adjustAutoCommitConfig( - Properties properties, OffsetCommitMode offsetCommitMode) { - if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS - || offsetCommitMode == OffsetCommitMode.DISABLED) { - properties.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); - } - } - // ------------------------------------------------------------------------ - // Configuration - // ------------------------------------------------------------------------ - - /** - * Sets the given {@link WatermarkStrategy} on this consumer. These will be used to assign - * timestamps to records and generates watermarks to signal event time progress. - * - *

Running timestamp extractors / watermark generators directly inside the Kafka source - * (which you can do by using this method), per Kafka partition, allows users to let them - * exploit the per-partition characteristics. - * - *

When a subtask of a FlinkKafkaConsumer source reads multiple Kafka partitions, the streams - * from the partitions are unioned in a "first come first serve" fashion. Per-partition - * characteristics are usually lost that way. For example, if the timestamps are strictly - * ascending per Kafka partition, they will not be strictly ascending in the resulting Flink - * DataStream, if the parallel source subtask reads more than one partition. - * - *

Common watermark generation patterns can be found as static methods in the {@link - * org.apache.flink.api.common.eventtime.WatermarkStrategy} class. - * - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase assignTimestampsAndWatermarks( - WatermarkStrategy watermarkStrategy) { - checkNotNull(watermarkStrategy); - - try { - ClosureCleaner.clean( - watermarkStrategy, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); - this.watermarkStrategy = new SerializedValue<>(watermarkStrategy); - } catch (Exception e) { - throw new IllegalArgumentException( - "The given WatermarkStrategy is not serializable", e); - } - - return this; - } - - /** - * Specifies an {@link AssignerWithPunctuatedWatermarks} to emit watermarks in a punctuated - * manner. The watermark extractor will run per Kafka partition, watermarks will be merged - * across partitions in the same way as in the Flink runtime, when streams are merged. - * - *

When a subtask of a FlinkKafkaConsumer source reads multiple Kafka partitions, the streams - * from the partitions are unioned in a "first come first serve" fashion. Per-partition - * characteristics are usually lost that way. For example, if the timestamps are strictly - * ascending per Kafka partition, they will not be strictly ascending in the resulting Flink - * DataStream, if the parallel source subtask reads more than one partition. - * - *

Running timestamp extractors / watermark generators directly inside the Kafka source, per - * Kafka partition, allows users to let them exploit the per-partition characteristics. - * - *

Note: One can use either an {@link AssignerWithPunctuatedWatermarks} or an {@link - * AssignerWithPeriodicWatermarks}, not both at the same time. - * - *

This method uses the deprecated watermark generator interfaces. Please switch to {@link - * #assignTimestampsAndWatermarks(WatermarkStrategy)} to use the new interfaces instead. The new - * interfaces support watermark idleness and no longer need to differentiate between "periodic" - * and "punctuated" watermarks. - * - * @deprecated Please use {@link #assignTimestampsAndWatermarks(WatermarkStrategy)} instead. - * @param assigner The timestamp assigner / watermark generator to use. - * @return The consumer object, to allow function chaining. - */ - @Deprecated - public FlinkKafkaConsumerBase assignTimestampsAndWatermarks( - AssignerWithPunctuatedWatermarks assigner) { - checkNotNull(assigner); - - if (this.watermarkStrategy != null) { - throw new IllegalStateException("Some watermark strategy has already been set."); - } - - try { - ClosureCleaner.clean(assigner, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); - final WatermarkStrategy wms = - new AssignerWithPunctuatedWatermarksAdapter.Strategy<>(assigner); - - return assignTimestampsAndWatermarks(wms); - } catch (Exception e) { - throw new IllegalArgumentException("The given assigner is not serializable", e); - } - } - - /** - * Specifies an {@link AssignerWithPunctuatedWatermarks} to emit watermarks in a punctuated - * manner. The watermark extractor will run per Kafka partition, watermarks will be merged - * across partitions in the same way as in the Flink runtime, when streams are merged. - * - *

When a subtask of a FlinkKafkaConsumer source reads multiple Kafka partitions, the streams - * from the partitions are unioned in a "first come first serve" fashion. Per-partition - * characteristics are usually lost that way. For example, if the timestamps are strictly - * ascending per Kafka partition, they will not be strictly ascending in the resulting Flink - * DataStream, if the parallel source subtask reads more that one partition. - * - *

Running timestamp extractors / watermark generators directly inside the Kafka source, per - * Kafka partition, allows users to let them exploit the per-partition characteristics. - * - *

Note: One can use either an {@link AssignerWithPunctuatedWatermarks} or an {@link - * AssignerWithPeriodicWatermarks}, not both at the same time. - * - *

This method uses the deprecated watermark generator interfaces. Please switch to {@link - * #assignTimestampsAndWatermarks(WatermarkStrategy)} to use the new interfaces instead. The new - * interfaces support watermark idleness and no longer need to differentiate between "periodic" - * and "punctuated" watermarks. - * - * @deprecated Please use {@link #assignTimestampsAndWatermarks(WatermarkStrategy)} instead. - * @param assigner The timestamp assigner / watermark generator to use. - * @return The consumer object, to allow function chaining. - */ - @Deprecated - public FlinkKafkaConsumerBase assignTimestampsAndWatermarks( - AssignerWithPeriodicWatermarks assigner) { - checkNotNull(assigner); - - if (this.watermarkStrategy != null) { - throw new IllegalStateException("Some watermark strategy has already been set."); - } - - try { - ClosureCleaner.clean(assigner, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); - final WatermarkStrategy wms = - new AssignerWithPeriodicWatermarksAdapter.Strategy<>(assigner); - - return assignTimestampsAndWatermarks(wms); - } catch (Exception e) { - throw new IllegalArgumentException("The given assigner is not serializable", e); - } - } - - /** - * Specifies whether or not the consumer should commit offsets back to Kafka on checkpoints. - * - *

This setting will only have effect if checkpointing is enabled for the job. If - * checkpointing isn't enabled, only the "auto.commit.enable" (for 0.8) / "enable.auto.commit" - * (for 0.9+) property settings will be used. - * - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase setCommitOffsetsOnCheckpoints(boolean commitOnCheckpoints) { - this.enableCommitOnCheckpoints = commitOnCheckpoints; - return this; - } - - /** - * Specifies the consumer to start reading from the earliest offset for all partitions. This - * lets the consumer ignore any committed group offsets in Zookeeper / Kafka brokers. - * - *

This method does not affect where partitions are read from when the consumer is restored - * from a checkpoint or savepoint. When the consumer is restored from a checkpoint or savepoint, - * only the offsets in the restored state will be used. - * - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase setStartFromEarliest() { - this.startupMode = StartupMode.EARLIEST; - this.startupOffsetsTimestamp = null; - this.specificStartupOffsets = null; - return this; - } - - /** - * Specifies the consumer to start reading from the latest offset for all partitions. This lets - * the consumer ignore any committed group offsets in Zookeeper / Kafka brokers. - * - *

This method does not affect where partitions are read from when the consumer is restored - * from a checkpoint or savepoint. When the consumer is restored from a checkpoint or savepoint, - * only the offsets in the restored state will be used. - * - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase setStartFromLatest() { - this.startupMode = StartupMode.LATEST; - this.startupOffsetsTimestamp = null; - this.specificStartupOffsets = null; - return this; - } - - /** - * Specifies the consumer to start reading partitions from a specified timestamp. The specified - * timestamp must be before the current timestamp. This lets the consumer ignore any committed - * group offsets in Zookeeper / Kafka brokers. - * - *

The consumer will look up the earliest offset whose timestamp is greater than or equal to - * the specific timestamp from Kafka. If there's no such offset, the consumer will use the - * latest offset to read data from kafka. - * - *

This method does not affect where partitions are read from when the consumer is restored - * from a checkpoint or savepoint. When the consumer is restored from a checkpoint or savepoint, - * only the offsets in the restored state will be used. - * - * @param startupOffsetsTimestamp timestamp for the startup offsets, as milliseconds from epoch. - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase setStartFromTimestamp(long startupOffsetsTimestamp) { - checkArgument( - startupOffsetsTimestamp >= 0, - "The provided value for the startup offsets timestamp is invalid."); - - long currentTimestamp = System.currentTimeMillis(); - checkArgument( - startupOffsetsTimestamp <= currentTimestamp, - "Startup time[%s] must be before current time[%s].", - startupOffsetsTimestamp, - currentTimestamp); - - this.startupMode = StartupMode.TIMESTAMP; - this.startupOffsetsTimestamp = startupOffsetsTimestamp; - this.specificStartupOffsets = null; - return this; - } - - /** - * Specifies the consumer to start reading from any committed group offsets found in Zookeeper / - * Kafka brokers. The "group.id" property must be set in the configuration properties. If no - * offset can be found for a partition, the behaviour in "auto.offset.reset" set in the - * configuration properties will be used for the partition. - * - *

This method does not affect where partitions are read from when the consumer is restored - * from a checkpoint or savepoint. When the consumer is restored from a checkpoint or savepoint, - * only the offsets in the restored state will be used. - * - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase setStartFromGroupOffsets() { - this.startupMode = StartupMode.GROUP_OFFSETS; - this.startupOffsetsTimestamp = null; - this.specificStartupOffsets = null; - return this; - } - - /** - * Specifies the consumer to start reading partitions from specific offsets, set independently - * for each partition. The specified offset should be the offset of the next record that will be - * read from partitions. This lets the consumer ignore any committed group offsets in Zookeeper - * / Kafka brokers. - * - *

If the provided map of offsets contains entries whose {@link KafkaTopicPartition} is not - * subscribed by the consumer, the entry will be ignored. If the consumer subscribes to a - * partition that does not exist in the provided map of offsets, the consumer will fallback to - * the default group offset behaviour (see {@link - * FlinkKafkaConsumerBase#setStartFromGroupOffsets()}) for that particular partition. - * - *

If the specified offset for a partition is invalid, or the behaviour for that partition is - * defaulted to group offsets but still no group offset could be found for it, then the - * "auto.offset.reset" behaviour set in the configuration properties will be used for the - * partition - * - *

This method does not affect where partitions are read from when the consumer is restored - * from a checkpoint or savepoint. When the consumer is restored from a checkpoint or savepoint, - * only the offsets in the restored state will be used. - * - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase setStartFromSpecificOffsets( - Map specificStartupOffsets) { - this.startupMode = StartupMode.SPECIFIC_OFFSETS; - this.startupOffsetsTimestamp = null; - this.specificStartupOffsets = checkNotNull(specificStartupOffsets); - return this; - } - - /** - * By default, when restoring from a checkpoint / savepoint, the consumer always ignores - * restored partitions that are no longer associated with the current specified topics or topic - * pattern to subscribe to. - * - *

This method configures the consumer to not filter the restored partitions, therefore - * always attempting to consume whatever partition was present in the previous execution - * regardless of the specified topics to subscribe to in the current execution. - * - * @return The consumer object, to allow function chaining. - */ - public FlinkKafkaConsumerBase disableFilterRestoredPartitionsWithSubscribedTopics() { - this.filterRestoredPartitionsWithCurrentTopicsDescriptor = false; - return this; - } - - // ------------------------------------------------------------------------ - // Work methods - // ------------------------------------------------------------------------ - - @Override - public void open(Configuration configuration) throws Exception { - // determine the offset commit mode - this.offsetCommitMode = - OffsetCommitModes.fromConfiguration( - getIsAutoCommitEnabled(), - enableCommitOnCheckpoints, - ((StreamingRuntimeContext) getRuntimeContext()).isCheckpointingEnabled()); - - // create the partition discoverer - this.partitionDiscoverer = - createPartitionDiscoverer( - topicsDescriptor, - getRuntimeContext().getIndexOfThisSubtask(), - getRuntimeContext().getNumberOfParallelSubtasks()); - this.partitionDiscoverer.open(); - - subscribedPartitionsToStartOffsets = new HashMap<>(); - final List allPartitions = partitionDiscoverer.discoverPartitions(); - if (restoredState != null) { - for (KafkaTopicPartition partition : allPartitions) { - if (!restoredState.containsKey(partition)) { - restoredState.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET); - } - } - - for (Map.Entry restoredStateEntry : - restoredState.entrySet()) { - // seed the partition discoverer with the union state while filtering out - // restored partitions that should not be subscribed by this subtask - if (KafkaTopicPartitionAssigner.assign( - restoredStateEntry.getKey(), - getRuntimeContext().getNumberOfParallelSubtasks()) - == getRuntimeContext().getIndexOfThisSubtask()) { - subscribedPartitionsToStartOffsets.put( - restoredStateEntry.getKey(), restoredStateEntry.getValue()); - } - } - - if (filterRestoredPartitionsWithCurrentTopicsDescriptor) { - subscribedPartitionsToStartOffsets - .entrySet() - .removeIf( - entry -> { - if (!topicsDescriptor.isMatchingTopic( - entry.getKey().getTopic())) { - LOG.warn( - "{} is removed from subscribed partitions since it is no longer associated with topics descriptor of current execution.", - entry.getKey()); - return true; - } - return false; - }); - } - - LOG.info( - "Consumer subtask {} will start reading {} partitions with offsets in restored state: {}", - getRuntimeContext().getIndexOfThisSubtask(), - subscribedPartitionsToStartOffsets.size(), - subscribedPartitionsToStartOffsets); - } else { - // use the partition discoverer to fetch the initial seed partitions, - // and set their initial offsets depending on the startup mode. - // for SPECIFIC_OFFSETS and TIMESTAMP modes, we set the specific offsets now; - // for other modes (EARLIEST, LATEST, and GROUP_OFFSETS), the offset is lazily - // determined - // when the partition is actually read. - switch (startupMode) { - case SPECIFIC_OFFSETS: - if (specificStartupOffsets == null) { - throw new IllegalStateException( - "Startup mode for the consumer set to " - + StartupMode.SPECIFIC_OFFSETS - + ", but no specific offsets were specified."); - } - - for (KafkaTopicPartition seedPartition : allPartitions) { - Long specificOffset = specificStartupOffsets.get(seedPartition); - if (specificOffset != null) { - // since the specified offsets represent the next record to read, we - // subtract - // it by one so that the initial state of the consumer will be correct - subscribedPartitionsToStartOffsets.put( - seedPartition, specificOffset - 1); - } else { - // default to group offset behaviour if the user-provided specific - // offsets - // do not contain a value for this partition - subscribedPartitionsToStartOffsets.put( - seedPartition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET); - } - } - - break; - case TIMESTAMP: - if (startupOffsetsTimestamp == null) { - throw new IllegalStateException( - "Startup mode for the consumer set to " - + StartupMode.TIMESTAMP - + ", but no startup timestamp was specified."); - } - - for (Map.Entry partitionToOffset : - fetchOffsetsWithTimestamp(allPartitions, startupOffsetsTimestamp) - .entrySet()) { - subscribedPartitionsToStartOffsets.put( - partitionToOffset.getKey(), - (partitionToOffset.getValue() == null) - // if an offset cannot be retrieved for a partition with the - // given timestamp, - // we default to using the latest offset for the partition - ? KafkaTopicPartitionStateSentinel.LATEST_OFFSET - // since the specified offsets represent the next record to - // read, we subtract - // it by one so that the initial state of the consumer will - // be correct - : partitionToOffset.getValue() - 1); - } - - break; - default: - for (KafkaTopicPartition seedPartition : allPartitions) { - subscribedPartitionsToStartOffsets.put( - seedPartition, startupMode.getStateSentinel()); - } - } - - if (!subscribedPartitionsToStartOffsets.isEmpty()) { - switch (startupMode) { - case EARLIEST: - LOG.info( - "Consumer subtask {} will start reading the following {} partitions from the earliest offsets: {}", - getRuntimeContext().getIndexOfThisSubtask(), - subscribedPartitionsToStartOffsets.size(), - subscribedPartitionsToStartOffsets.keySet()); - break; - case LATEST: - LOG.info( - "Consumer subtask {} will start reading the following {} partitions from the latest offsets: {}", - getRuntimeContext().getIndexOfThisSubtask(), - subscribedPartitionsToStartOffsets.size(), - subscribedPartitionsToStartOffsets.keySet()); - break; - case TIMESTAMP: - LOG.info( - "Consumer subtask {} will start reading the following {} partitions from timestamp {}: {}", - getRuntimeContext().getIndexOfThisSubtask(), - subscribedPartitionsToStartOffsets.size(), - startupOffsetsTimestamp, - subscribedPartitionsToStartOffsets.keySet()); - break; - case SPECIFIC_OFFSETS: - LOG.info( - "Consumer subtask {} will start reading the following {} partitions from the specified startup offsets {}: {}", - getRuntimeContext().getIndexOfThisSubtask(), - subscribedPartitionsToStartOffsets.size(), - specificStartupOffsets, - subscribedPartitionsToStartOffsets.keySet()); - - List partitionsDefaultedToGroupOffsets = - new ArrayList<>(subscribedPartitionsToStartOffsets.size()); - for (Map.Entry subscribedPartition : - subscribedPartitionsToStartOffsets.entrySet()) { - if (subscribedPartition.getValue() - == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) { - partitionsDefaultedToGroupOffsets.add(subscribedPartition.getKey()); - } - } - - if (partitionsDefaultedToGroupOffsets.size() > 0) { - LOG.warn( - "Consumer subtask {} cannot find offsets for the following {} partitions in the specified startup offsets: {}" - + "; their startup offsets will be defaulted to their committed group offsets in Kafka.", - getRuntimeContext().getIndexOfThisSubtask(), - partitionsDefaultedToGroupOffsets.size(), - partitionsDefaultedToGroupOffsets); - } - break; - case GROUP_OFFSETS: - LOG.info( - "Consumer subtask {} will start reading the following {} partitions from the committed group offsets in Kafka: {}", - getRuntimeContext().getIndexOfThisSubtask(), - subscribedPartitionsToStartOffsets.size(), - subscribedPartitionsToStartOffsets.keySet()); - } - } else { - LOG.info( - "Consumer subtask {} initially has no partitions to read from.", - getRuntimeContext().getIndexOfThisSubtask()); - } - } - - this.deserializer.open( - RuntimeContextInitializationContextAdapters.deserializationAdapter( - getRuntimeContext(), metricGroup -> metricGroup.addGroup("user"))); - } - - @Override - public void run(SourceContext sourceContext) throws Exception { - if (subscribedPartitionsToStartOffsets == null) { - throw new Exception("The partitions were not set for the consumer"); - } - - // initialize commit metrics and default offset callback method - this.successfulCommits = - this.getRuntimeContext() - .getMetricGroup() - .counter(COMMITS_SUCCEEDED_METRICS_COUNTER); - this.failedCommits = - this.getRuntimeContext().getMetricGroup().counter(COMMITS_FAILED_METRICS_COUNTER); - final int subtaskIndex = this.getRuntimeContext().getIndexOfThisSubtask(); - - this.offsetCommitCallback = - new KafkaCommitCallback() { - @Override - public void onSuccess() { - successfulCommits.inc(); - } - - @Override - public void onException(Throwable cause) { - LOG.warn( - String.format( - "Consumer subtask %d failed async Kafka commit.", - subtaskIndex), - cause); - failedCommits.inc(); - } - }; - - // mark the subtask as temporarily idle if there are no initial seed partitions; - // once this subtask discovers some partitions and starts collecting records, the subtask's - // status will automatically be triggered back to be active. - if (subscribedPartitionsToStartOffsets.isEmpty()) { - sourceContext.markAsTemporarilyIdle(); - } - - LOG.info( - "Consumer subtask {} creating fetcher with offsets {}.", - getRuntimeContext().getIndexOfThisSubtask(), - subscribedPartitionsToStartOffsets); - // from this point forward: - // - 'snapshotState' will draw offsets from the fetcher, - // instead of being built from `subscribedPartitionsToStartOffsets` - // - 'notifyCheckpointComplete' will start to do work (i.e. commit offsets to - // Kafka through the fetcher, if configured to do so) - this.kafkaFetcher = - createFetcher( - sourceContext, - subscribedPartitionsToStartOffsets, - watermarkStrategy, - (StreamingRuntimeContext) getRuntimeContext(), - offsetCommitMode, - getRuntimeContext().getMetricGroup().addGroup(KAFKA_CONSUMER_METRICS_GROUP), - useMetrics); - - if (!running) { - return; - } - - // depending on whether we were restored with the current state version (1.3), - // remaining logic branches off into 2 paths: - // 1) New state - partition discovery loop executed as separate thread, with this - // thread running the main fetcher loop - // 2) Old state - partition discovery is disabled and only the main fetcher loop is - // executed - if (discoveryIntervalMillis == PARTITION_DISCOVERY_DISABLED) { - kafkaFetcher.runFetchLoop(); - } else { - runWithPartitionDiscovery(); - } - } - - private void runWithPartitionDiscovery() throws Exception { - final AtomicReference discoveryLoopErrorRef = new AtomicReference<>(); - createAndStartDiscoveryLoop(discoveryLoopErrorRef); - - kafkaFetcher.runFetchLoop(); - - // make sure that the partition discoverer is waked up so that - // the discoveryLoopThread exits - partitionDiscoverer.wakeup(); - joinDiscoveryLoopThread(); - - // rethrow any fetcher errors - final Exception discoveryLoopError = discoveryLoopErrorRef.get(); - if (discoveryLoopError != null) { - throw new RuntimeException(discoveryLoopError); - } - } - - @VisibleForTesting - void joinDiscoveryLoopThread() throws InterruptedException { - if (discoveryLoopThread != null) { - discoveryLoopThread.join(); - } - } - - private void createAndStartDiscoveryLoop(AtomicReference discoveryLoopErrorRef) { - discoveryLoopThread = - new Thread( - () -> { - try { - // --------------------- partition discovery loop - // --------------------- - - // throughout the loop, we always eagerly check if we are still - // running before - // performing the next operation, so that we can escape the loop as - // soon as possible - - while (running) { - if (LOG.isDebugEnabled()) { - LOG.debug( - "Consumer subtask {} is trying to discover new partitions ...", - getRuntimeContext().getIndexOfThisSubtask()); - } - - final List discoveredPartitions; - try { - discoveredPartitions = - partitionDiscoverer.discoverPartitions(); - } catch (AbstractPartitionDiscoverer.WakeupException - | AbstractPartitionDiscoverer.ClosedException e) { - // the partition discoverer may have been closed or woken up - // before or during the discovery; - // this would only happen if the consumer was canceled; - // simply escape the loop - break; - } - - // no need to add the discovered partitions if we were closed - // during the meantime - if (running && !discoveredPartitions.isEmpty()) { - kafkaFetcher.addDiscoveredPartitions(discoveredPartitions); - } - - // do not waste any time sleeping if we're not running anymore - if (running && discoveryIntervalMillis != 0) { - try { - Thread.sleep(discoveryIntervalMillis); - } catch (InterruptedException iex) { - // may be interrupted if the consumer was canceled - // midway; simply escape the loop - break; - } - } - } - } catch (Exception e) { - discoveryLoopErrorRef.set(e); - } finally { - // calling cancel will also let the fetcher loop escape - // (if not running, cancel() was already called) - if (running) { - cancel(); - } - } - }, - "Kafka Partition Discovery for " - + getRuntimeContext().getTaskNameWithSubtasks()); - - discoveryLoopThread.start(); - } - - @Override - public void cancel() { - // set ourselves as not running; - // this would let the main discovery loop escape as soon as possible - running = false; - - if (discoveryLoopThread != null) { - - if (partitionDiscoverer != null) { - // we cannot close the discoverer here, as it is error-prone to concurrent access; - // only wakeup the discoverer, the discovery loop will clean itself up after it - // escapes - partitionDiscoverer.wakeup(); - } - - // the discovery loop may currently be sleeping in-between - // consecutive discoveries; interrupt to shutdown faster - discoveryLoopThread.interrupt(); - } - - // abort the fetcher, if there is one - if (kafkaFetcher != null) { - kafkaFetcher.cancel(); - } - } - - @Override - public void close() throws Exception { - cancel(); - - joinDiscoveryLoopThread(); - - Exception exception = null; - if (partitionDiscoverer != null) { - try { - partitionDiscoverer.close(); - } catch (Exception e) { - exception = e; - } - } - - try { - super.close(); - } catch (Exception e) { - exception = ExceptionUtils.firstOrSuppressed(e, exception); - } - - if (exception != null) { - throw exception; - } - } - - // ------------------------------------------------------------------------ - // Checkpoint and restore - // ------------------------------------------------------------------------ - - @Override - public final void initializeState(FunctionInitializationContext context) throws Exception { - - OperatorStateStore stateStore = context.getOperatorStateStore(); - - this.unionOffsetStates = - stateStore.getUnionListState( - new ListStateDescriptor<>( - OFFSETS_STATE_NAME, - createStateSerializer(getRuntimeContext().getExecutionConfig()))); - - if (context.isRestored()) { - restoredState = new TreeMap<>(new KafkaTopicPartition.Comparator()); - - // populate actual holder for restored state - for (Tuple2 kafkaOffset : unionOffsetStates.get()) { - restoredState.put(kafkaOffset.f0, kafkaOffset.f1); - } - - LOG.info( - "Consumer subtask {} restored state: {}.", - getRuntimeContext().getIndexOfThisSubtask(), - restoredState); - } else { - LOG.info( - "Consumer subtask {} has no restore state.", - getRuntimeContext().getIndexOfThisSubtask()); - } - } - - @Override - public final void snapshotState(FunctionSnapshotContext context) throws Exception { - if (!running) { - LOG.debug("snapshotState() called on closed source"); - } else { - unionOffsetStates.clear(); - - final AbstractFetcher fetcher = this.kafkaFetcher; - if (fetcher == null) { - // the fetcher has not yet been initialized, which means we need to return the - // originally restored offsets or the assigned partitions - for (Map.Entry subscribedPartition : - subscribedPartitionsToStartOffsets.entrySet()) { - unionOffsetStates.add( - Tuple2.of( - subscribedPartition.getKey(), subscribedPartition.getValue())); - } - - if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) { - // the map cannot be asynchronously updated, because only one checkpoint call - // can happen - // on this function at a time: either snapshotState() or - // notifyCheckpointComplete() - pendingOffsetsToCommit.put(context.getCheckpointId(), restoredState); - } - } else { - HashMap currentOffsets = fetcher.snapshotCurrentState(); - - if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) { - // the map cannot be asynchronously updated, because only one checkpoint call - // can happen - // on this function at a time: either snapshotState() or - // notifyCheckpointComplete() - pendingOffsetsToCommit.put(context.getCheckpointId(), currentOffsets); - } - - for (Map.Entry kafkaTopicPartitionLongEntry : - currentOffsets.entrySet()) { - unionOffsetStates.add( - Tuple2.of( - kafkaTopicPartitionLongEntry.getKey(), - kafkaTopicPartitionLongEntry.getValue())); - } - } - - if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) { - // truncate the map of pending offsets to commit, to prevent infinite growth - while (pendingOffsetsToCommit.size() > MAX_NUM_PENDING_CHECKPOINTS) { - pendingOffsetsToCommit.remove(0); - } - } - } - } - - @Override - public final void notifyCheckpointComplete(long checkpointId) throws Exception { - if (!running) { - LOG.debug("notifyCheckpointComplete() called on closed source"); - return; - } - - final AbstractFetcher fetcher = this.kafkaFetcher; - if (fetcher == null) { - LOG.debug("notifyCheckpointComplete() called on uninitialized source"); - return; - } - - if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) { - // only one commit operation must be in progress - if (LOG.isDebugEnabled()) { - LOG.debug( - "Consumer subtask {} committing offsets to Kafka/ZooKeeper for checkpoint {}.", - getRuntimeContext().getIndexOfThisSubtask(), - checkpointId); - } - - try { - final int posInMap = pendingOffsetsToCommit.indexOf(checkpointId); - if (posInMap == -1) { - LOG.warn( - "Consumer subtask {} received confirmation for unknown checkpoint id {}", - getRuntimeContext().getIndexOfThisSubtask(), - checkpointId); - return; - } - - @SuppressWarnings("unchecked") - Map offsets = - (Map) pendingOffsetsToCommit.remove(posInMap); - - // remove older checkpoints in map - for (int i = 0; i < posInMap; i++) { - pendingOffsetsToCommit.remove(0); - } - - if (offsets == null || offsets.size() == 0) { - LOG.debug( - "Consumer subtask {} has empty checkpoint state.", - getRuntimeContext().getIndexOfThisSubtask()); - return; - } - - fetcher.commitInternalOffsetsToKafka(offsets, offsetCommitCallback); - } catch (Exception e) { - if (running) { - throw e; - } - // else ignore exception if we are no longer running - } - } - } - - @Override - public void notifyCheckpointAborted(long checkpointId) {} - - // ------------------------------------------------------------------------ - // Kafka Consumer specific methods - // ------------------------------------------------------------------------ - - /** - * Creates the fetcher that connect to the Kafka brokers, pulls data, deserialized the data, and - * emits it into the data streams. - * - * @param sourceContext The source context to emit data to. - * @param subscribedPartitionsToStartOffsets The set of partitions that this subtask should - * handle, with their start offsets. - * @param watermarkStrategy Optional, a serialized WatermarkStrategy. - * @param runtimeContext The task's runtime context. - * @return The instantiated fetcher - * @throws Exception The method should forward exceptions - */ - protected abstract AbstractFetcher createFetcher( - SourceContext sourceContext, - Map subscribedPartitionsToStartOffsets, - SerializedValue> watermarkStrategy, - StreamingRuntimeContext runtimeContext, - OffsetCommitMode offsetCommitMode, - MetricGroup kafkaMetricGroup, - boolean useMetrics) - throws Exception; - - /** - * Creates the partition discoverer that is used to find new partitions for this subtask. - * - * @param topicsDescriptor Descriptor that describes whether we are discovering partitions for - * fixed topics or a topic pattern. - * @param indexOfThisSubtask The index of this consumer subtask. - * @param numParallelSubtasks The total number of parallel consumer subtasks. - * @return The instantiated partition discoverer - */ - protected abstract AbstractPartitionDiscoverer createPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks); - - protected abstract boolean getIsAutoCommitEnabled(); - - protected abstract Map fetchOffsetsWithTimestamp( - Collection partitions, long timestamp); - - // ------------------------------------------------------------------------ - // ResultTypeQueryable methods - // ------------------------------------------------------------------------ - - @Override - public TypeInformation getProducedType() { - return deserializer.getProducedType(); - } - - // ------------------------------------------------------------------------ - // Test utilities - // ------------------------------------------------------------------------ - - @VisibleForTesting - Map getSubscribedPartitionsToStartOffsets() { - return subscribedPartitionsToStartOffsets; - } - - @VisibleForTesting - TreeMap getRestoredState() { - return restoredState; - } - - @VisibleForTesting - OffsetCommitMode getOffsetCommitMode() { - return offsetCommitMode; - } - - @VisibleForTesting - LinkedMap getPendingOffsetsToCommit() { - return pendingOffsetsToCommit; - } - - @VisibleForTesting - public boolean getEnableCommitOnCheckpoints() { - return enableCommitOnCheckpoints; - } - - /** - * Creates state serializer for kafka topic partition to offset tuple. Using of the explicit - * state serializer with KryoSerializer is needed because otherwise users cannot use - * 'disableGenericTypes' properties with KafkaConsumer. - */ - @VisibleForTesting - static TupleSerializer> createStateSerializer( - ExecutionConfig executionConfig) { - // explicit serializer will keep the compatibility with GenericTypeInformation and allow to - // disableGenericTypes for users - TypeSerializer[] fieldSerializers = - new TypeSerializer[] { - new KryoSerializer<>(KafkaTopicPartition.class, executionConfig), - LongSerializer.INSTANCE - }; - @SuppressWarnings("unchecked") - Class> tupleClass = - (Class>) (Class) Tuple2.class; - return new TupleSerializer<>(tupleClass, fieldSerializers); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaErrorCode.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaErrorCode.java deleted file mode 100644 index 3c1ae27e3..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaErrorCode.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.annotation.PublicEvolving; - -/** - * Error codes used in {@link FlinkKafkaException}. - * - * @deprecated Will be removed with {@link FlinkKafkaProducer} and {@link - * org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle}. - */ -@PublicEvolving -@Deprecated -public enum FlinkKafkaErrorCode { - PRODUCERS_POOL_EMPTY, - EXTERNAL_ERROR -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaException.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaException.java deleted file mode 100644 index 65b654c64..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaException.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.util.FlinkException; - -/** - * Exception used by {@link FlinkKafkaProducer} and {@link FlinkKafkaConsumer}. - * - * @deprecated Will be removed with {@link FlinkKafkaProducer} and {@link - * org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle}. - */ -@PublicEvolving -@Deprecated -public class FlinkKafkaException extends FlinkException { - - private static final long serialVersionUID = 920269130311214200L; - - private final FlinkKafkaErrorCode errorCode; - - public FlinkKafkaException(FlinkKafkaErrorCode errorCode, String message) { - super(message); - this.errorCode = errorCode; - } - - public FlinkKafkaException(FlinkKafkaErrorCode errorCode, String message, Throwable cause) { - super(message, cause); - this.errorCode = errorCode; - } - - public FlinkKafkaErrorCode getErrorCode() { - return errorCode; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducer.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducer.java deleted file mode 100644 index 4b39749d9..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducer.java +++ /dev/null @@ -1,1920 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.functions.RuntimeContext; -import org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters; -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.api.common.state.ListState; -import org.apache.flink.api.common.state.ListStateDescriptor; -import org.apache.flink.api.common.time.Time; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; -import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; -import org.apache.flink.api.java.ClosureCleaner; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.connector.kafka.sink.KafkaSink; -import org.apache.flink.core.memory.DataInputView; -import org.apache.flink.core.memory.DataOutputView; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.runtime.state.FunctionSnapshotContext; -import org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.connectors.kafka.internals.FlinkKafkaInternalProducer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaSerializationSchemaWrapper; -import org.apache.flink.streaming.connectors.kafka.internals.TransactionalIdsGenerator; -import org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricMutableWrapper; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; -import org.apache.flink.util.ExceptionUtils; -import org.apache.flink.util.NetUtils; -import org.apache.flink.util.TemporaryClassLoaderContext; - -import org.apache.commons.lang3.StringUtils; -import org.apache.kafka.clients.producer.Callback; -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerConfig; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.kafka.common.Metric; -import org.apache.kafka.common.MetricName; -import org.apache.kafka.common.PartitionInfo; -import org.apache.kafka.common.errors.InvalidTxnStateException; -import org.apache.kafka.common.errors.ProducerFencedException; -import org.apache.kafka.common.serialization.ByteArraySerializer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nullable; - -import java.io.IOException; -import java.io.ObjectInputStream; -import java.time.Duration; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Properties; -import java.util.Set; -import java.util.concurrent.BlockingDeque; -import java.util.concurrent.LinkedBlockingDeque; -import java.util.concurrent.atomic.AtomicLong; - -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.apache.flink.util.Preconditions.checkState; - -/** - * Flink Sink to produce data into a Kafka topic. By default producer will use {@link - * Semantic#AT_LEAST_ONCE} semantic. Before using {@link Semantic#EXACTLY_ONCE} please refer to - * Flink's Kafka connector documentation. - * - * @deprecated Please use {@link KafkaSink}. - */ -@Deprecated -@PublicEvolving -public class FlinkKafkaProducer - extends TwoPhaseCommitSinkFunction< - IN, - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext> { - - /** - * Semantics that can be chosen. - *

  • {@link #EXACTLY_ONCE} - *
  • {@link #AT_LEAST_ONCE} - *
  • {@link #NONE} - */ - public enum Semantic { - - /** - * Semantic.EXACTLY_ONCE the Flink producer will write all messages in a Kafka transaction - * that will be committed to Kafka on a checkpoint. - * - *

    In this mode {@link FlinkKafkaProducer} sets up a pool of {@link - * FlinkKafkaInternalProducer}. Between each checkpoint a Kafka transaction is created, - * which is committed on {@link FlinkKafkaProducer#notifyCheckpointComplete(long)}. If - * checkpoint complete notifications are running late, {@link FlinkKafkaProducer} can run - * out of {@link FlinkKafkaInternalProducer}s in the pool. In that case any subsequent - * {@link FlinkKafkaProducer#snapshotState(FunctionSnapshotContext)} requests will fail and - * {@link FlinkKafkaProducer} will keep using the {@link FlinkKafkaInternalProducer} from - * the previous checkpoint. To decrease the chance of failing checkpoints there are four - * options: - *

  • decrease number of max concurrent checkpoints - *
  • make checkpoints more reliable (so that they complete faster) - *
  • increase the delay between checkpoints - *
  • increase the size of {@link FlinkKafkaInternalProducer}s pool - */ - EXACTLY_ONCE, - - /** - * Semantic.AT_LEAST_ONCE the Flink producer will wait for all outstanding messages in the - * Kafka buffers to be acknowledged by the Kafka producer on a checkpoint. - */ - AT_LEAST_ONCE, - - /** - * Semantic.NONE means that nothing will be guaranteed. Messages can be lost and/or - * duplicated in case of failure. - */ - NONE - } - - private static final Logger LOG = LoggerFactory.getLogger(FlinkKafkaProducer.class); - - private static final long serialVersionUID = 1L; - - /** - * Number of characters to truncate the taskName to for the Kafka transactionalId. The maximum - * this can possibly be set to is 32,767 - (length of operatorUniqueId). - */ - private static final short maxTaskNameSize = 1_000; - - /** - * This coefficient determines what is the safe scale down factor. - * - *

    If the Flink application previously failed before first checkpoint completed or we are - * starting new batch of {@link FlinkKafkaProducer} from scratch without clean shutdown of the - * previous one, {@link FlinkKafkaProducer} doesn't know what was the set of previously used - * Kafka's transactionalId's. In that case, it will try to play safe and abort all of the - * possible transactionalIds from the range of: {@code [0, getNumberOfParallelSubtasks() * - * kafkaProducersPoolSize * SAFE_SCALE_DOWN_FACTOR) } - * - *

    The range of available to use transactional ids is: {@code [0, - * getNumberOfParallelSubtasks() * kafkaProducersPoolSize) } - * - *

    This means that if we decrease {@code getNumberOfParallelSubtasks()} by a factor larger - * than {@code SAFE_SCALE_DOWN_FACTOR} we can have a left some lingering transaction. - */ - public static final int SAFE_SCALE_DOWN_FACTOR = 5; - - /** Default number of KafkaProducers in the pool. See {@link Semantic#EXACTLY_ONCE}. */ - public static final int DEFAULT_KAFKA_PRODUCERS_POOL_SIZE = 5; - - /** Default value for kafka transaction timeout. */ - public static final Time DEFAULT_KAFKA_TRANSACTION_TIMEOUT = Time.hours(1); - - /** Configuration key for disabling the metrics reporting. */ - public static final String KEY_DISABLE_METRICS = "flink.disable-metrics"; - - /** - * Descriptor of the transactional IDs list. Note: This state is serialized by Kryo Serializer - * and it has compatibility problem that will be removed later. Please use - * NEXT_TRANSACTIONAL_ID_HINT_DESCRIPTOR_V2. - */ - @Deprecated - private static final ListStateDescriptor - NEXT_TRANSACTIONAL_ID_HINT_DESCRIPTOR = - new ListStateDescriptor<>( - "next-transactional-id-hint", - TypeInformation.of(NextTransactionalIdHint.class)); - - private static final ListStateDescriptor - NEXT_TRANSACTIONAL_ID_HINT_DESCRIPTOR_V2 = - new ListStateDescriptor<>( - "next-transactional-id-hint-v2", - new NextTransactionalIdHintSerializer()); - - /** State for nextTransactionalIdHint. */ - private transient ListState nextTransactionalIdHintState; - - /** Generator for Transactional IDs. */ - private transient TransactionalIdsGenerator transactionalIdsGenerator; - - /** Hint for picking next transactional id. */ - private transient NextTransactionalIdHint nextTransactionalIdHint; - - /** User defined properties for the Producer. */ - protected final Properties producerConfig; - - /** The name of the default topic this producer is writing data to. */ - protected final String defaultTopicId; - - /** - * (Serializable) SerializationSchema for turning objects used with Flink into. byte[] for - * Kafka. - */ - @Nullable private final KeyedSerializationSchema keyedSchema; - - /** - * (Serializable) serialization schema for serializing records to {@link ProducerRecord - * ProducerRecords}. - */ - @Nullable private final KafkaSerializationSchema kafkaSchema; - - /** User-provided partitioner for assigning an object to a Kafka partition for each topic. */ - @Nullable private final FlinkKafkaPartitioner flinkKafkaPartitioner; - - /** Partitions of each topic. */ - protected final Map topicPartitionsMap; - - /** - * Max number of producers in the pool. If all producers are in use, snapshoting state will - * throw an exception. - */ - private final int kafkaProducersPoolSize; - - /** Pool of available transactional ids. */ - private final BlockingDeque availableTransactionalIds = new LinkedBlockingDeque<>(); - - /** Flag controlling whether we are writing the Flink record's timestamp into Kafka. */ - protected boolean writeTimestampToKafka = false; - - /** The transactional.id prefix to be used by the producers when communicating with Kafka. */ - @Nullable private String transactionalIdPrefix = null; - - /** Flag indicating whether to accept failures (and log them), or to fail on failures. */ - private boolean logFailuresOnly; - - /** Semantic chosen for this instance. */ - protected Semantic semantic; - - // -------------------------------- Runtime fields ------------------------------------------ - - /** The callback than handles error propagation or logging callbacks. */ - @Nullable protected transient Callback callback; - - /** Errors encountered in the async producer are stored here. */ - @Nullable protected transient volatile Exception asyncException; - - /** Number of unacknowledged records. */ - protected final AtomicLong pendingRecords = new AtomicLong(); - - /** - * Cache of metrics to replace already registered metrics instead of overwriting existing ones. - */ - private final Map previouslyCreatedMetrics = new HashMap<>(); - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces a DataStream to the topic. - * - * @param brokerList Comma separated addresses of the brokers - * @param topicId ID of the Kafka topic. - * @param serializationSchema User defined (keyless) serialization schema. - */ - public FlinkKafkaProducer( - String brokerList, String topicId, SerializationSchema serializationSchema) { - this(topicId, serializationSchema, getPropertiesFromBrokerList(brokerList)); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces a DataStream to the topic. - * - *

    Using this constructor, the default {@link FlinkFixedPartitioner} will be used as the - * partitioner. This default partitioner maps each sink subtask to a single Kafka partition - * (i.e. all records received by a sink subtask will end up in the same Kafka partition). - * - *

    To use a custom partitioner, please use {@link #FlinkKafkaProducer(String, - * SerializationSchema, Properties, Optional)} instead. - * - * @param topicId ID of the Kafka topic. - * @param serializationSchema User defined key-less serialization schema. - * @param producerConfig Properties with the producer configuration. - */ - public FlinkKafkaProducer( - String topicId, - SerializationSchema serializationSchema, - Properties producerConfig) { - this( - topicId, - serializationSchema, - producerConfig, - Optional.of(new FlinkFixedPartitioner<>())); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces its input to the topic. It - * accepts a key-less {@link SerializationSchema} and possibly a custom {@link - * FlinkKafkaPartitioner}. - * - *

    Since a key-less {@link SerializationSchema} is used, all records sent to Kafka will not - * have an attached key. Therefore, if a partitioner is also not provided, records will be - * distributed to Kafka partitions in a round-robin fashion. - * - * @param topicId The topic to write data to - * @param serializationSchema A key-less serializable serialization schema for turning user - * objects into a kafka-consumable byte[] - * @param producerConfig Configuration properties for the KafkaProducer. 'bootstrap.servers.' is - * the only required argument. - * @param customPartitioner A serializable partitioner for assigning messages to Kafka - * partitions. If a partitioner is not provided, records will be distributed to Kafka - * partitions in a round-robin fashion. - */ - public FlinkKafkaProducer( - String topicId, - SerializationSchema serializationSchema, - Properties producerConfig, - Optional> customPartitioner) { - this( - topicId, - serializationSchema, - producerConfig, - customPartitioner.orElse(null), - Semantic.AT_LEAST_ONCE, - DEFAULT_KAFKA_PRODUCERS_POOL_SIZE); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces its input to the topic. It - * accepts a key-less {@link SerializationSchema} and possibly a custom {@link - * FlinkKafkaPartitioner}. - * - *

    Since a key-less {@link SerializationSchema} is used, all records sent to Kafka will not - * have an attached key. Therefore, if a partitioner is also not provided, records will be - * distributed to Kafka partitions in a round-robin fashion. - * - * @param topicId The topic to write data to - * @param serializationSchema A key-less serializable serialization schema for turning user - * objects into a kafka-consumable byte[] - * @param producerConfig Configuration properties for the KafkaProducer. 'bootstrap.servers.' is - * the only required argument. - * @param customPartitioner A serializable partitioner for assigning messages to Kafka - * partitions. If a partitioner is not provided, records will be distributed to Kafka - * partitions in a round-robin fashion. - * @param semantic Defines semantic that will be used by this producer (see {@link Semantic}). - * @param kafkaProducersPoolSize Overwrite default KafkaProducers pool size (see {@link - * Semantic#EXACTLY_ONCE}). - */ - public FlinkKafkaProducer( - String topicId, - SerializationSchema serializationSchema, - Properties producerConfig, - @Nullable FlinkKafkaPartitioner customPartitioner, - Semantic semantic, - int kafkaProducersPoolSize) { - this( - topicId, - null, - null, - new KafkaSerializationSchemaWrapper<>( - topicId, customPartitioner, false, serializationSchema), - producerConfig, - semantic, - kafkaProducersPoolSize); - } - - // ------------------- Key/Value serialization schema constructors ---------------------- - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces a DataStream to the topic. - * - *

    Using this constructor, the default {@link FlinkFixedPartitioner} will be used as the - * partitioner. This default partitioner maps each sink subtask to a single Kafka partition - * (i.e. all records received by a sink subtask will end up in the same Kafka partition). - * - *

    To use a custom partitioner, please use {@link #FlinkKafkaProducer(String, - * KeyedSerializationSchema, Properties, Optional)} instead. - * - * @param brokerList Comma separated addresses of the brokers - * @param topicId ID of the Kafka topic. - * @param serializationSchema User defined serialization schema supporting key/value messages - * @deprecated use {@link #FlinkKafkaProducer(String, KafkaSerializationSchema, Properties, - * Semantic)} - */ - @Deprecated - public FlinkKafkaProducer( - String brokerList, String topicId, KeyedSerializationSchema serializationSchema) { - this( - topicId, - serializationSchema, - getPropertiesFromBrokerList(brokerList), - Optional.of(new FlinkFixedPartitioner())); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces a DataStream to the topic. - * - *

    Using this constructor, the default {@link FlinkFixedPartitioner} will be used as the - * partitioner. This default partitioner maps each sink subtask to a single Kafka partition - * (i.e. all records received by a sink subtask will end up in the same Kafka partition). - * - *

    To use a custom partitioner, please use {@link #FlinkKafkaProducer(String, - * KeyedSerializationSchema, Properties, Optional)} instead. - * - * @param topicId ID of the Kafka topic. - * @param serializationSchema User defined serialization schema supporting key/value messages - * @param producerConfig Properties with the producer configuration. - * @deprecated use {@link #FlinkKafkaProducer(String, KafkaSerializationSchema, Properties, - * Semantic)} - */ - @Deprecated - public FlinkKafkaProducer( - String topicId, - KeyedSerializationSchema serializationSchema, - Properties producerConfig) { - this( - topicId, - serializationSchema, - producerConfig, - Optional.of(new FlinkFixedPartitioner())); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces a DataStream to the topic. - * - *

    Using this constructor, the default {@link FlinkFixedPartitioner} will be used as the - * partitioner. This default partitioner maps each sink subtask to a single Kafka partition - * (i.e. all records received by a sink subtask will end up in the same Kafka partition). - * - * @param topicId ID of the Kafka topic. - * @param serializationSchema User defined serialization schema supporting key/value messages - * @param producerConfig Properties with the producer configuration. - * @param semantic Defines semantic that will be used by this producer (see {@link Semantic}). - * @deprecated use {@link #FlinkKafkaProducer(String, KafkaSerializationSchema, Properties, - * Semantic)} - */ - @Deprecated - public FlinkKafkaProducer( - String topicId, - KeyedSerializationSchema serializationSchema, - Properties producerConfig, - Semantic semantic) { - this( - topicId, - serializationSchema, - producerConfig, - Optional.of(new FlinkFixedPartitioner()), - semantic, - DEFAULT_KAFKA_PRODUCERS_POOL_SIZE); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces its input to the topic. It - * accepts a keyed {@link KeyedSerializationSchema} and possibly a custom {@link - * FlinkKafkaPartitioner}. - * - *

    If a partitioner is not provided, written records will be partitioned by the attached key - * of each record (as determined by {@link KeyedSerializationSchema#serializeKey(Object)}). If - * written records do not have a key (i.e., {@link - * KeyedSerializationSchema#serializeKey(Object)} returns {@code null}), they will be - * distributed to Kafka partitions in a round-robin fashion. - * - * @param defaultTopicId The default topic to write data to - * @param serializationSchema A serializable serialization schema for turning user objects into - * a kafka-consumable byte[] supporting key/value messages - * @param producerConfig Configuration properties for the KafkaProducer. 'bootstrap.servers.' is - * the only required argument. - * @param customPartitioner A serializable partitioner for assigning messages to Kafka - * partitions. If a partitioner is not provided, records will be partitioned by the key of - * each record (determined by {@link KeyedSerializationSchema#serializeKey(Object)}). If the - * keys are {@code null}, then records will be distributed to Kafka partitions in a - * round-robin fashion. - * @deprecated use {@link #FlinkKafkaProducer(String, KafkaSerializationSchema, Properties, - * Semantic)} - */ - @Deprecated - public FlinkKafkaProducer( - String defaultTopicId, - KeyedSerializationSchema serializationSchema, - Properties producerConfig, - Optional> customPartitioner) { - this( - defaultTopicId, - serializationSchema, - producerConfig, - customPartitioner, - Semantic.AT_LEAST_ONCE, - DEFAULT_KAFKA_PRODUCERS_POOL_SIZE); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces its input to the topic. It - * accepts a keyed {@link KeyedSerializationSchema} and possibly a custom {@link - * FlinkKafkaPartitioner}. - * - *

    If a partitioner is not provided, written records will be partitioned by the attached key - * of each record (as determined by {@link KeyedSerializationSchema#serializeKey(Object)}). If - * written records do not have a key (i.e., {@link - * KeyedSerializationSchema#serializeKey(Object)} returns {@code null}), they will be - * distributed to Kafka partitions in a round-robin fashion. - * - * @param defaultTopicId The default topic to write data to - * @param serializationSchema A serializable serialization schema for turning user objects into - * a kafka-consumable byte[] supporting key/value messages - * @param producerConfig Configuration properties for the KafkaProducer. 'bootstrap.servers.' is - * the only required argument. - * @param customPartitioner A serializable partitioner for assigning messages to Kafka - * partitions. If a partitioner is not provided, records will be partitioned by the key of - * each record (determined by {@link KeyedSerializationSchema#serializeKey(Object)}). If the - * keys are {@code null}, then records will be distributed to Kafka partitions in a - * round-robin fashion. - * @param semantic Defines semantic that will be used by this producer (see {@link Semantic}). - * @param kafkaProducersPoolSize Overwrite default KafkaProducers pool size (see {@link - * Semantic#EXACTLY_ONCE}). - * @deprecated use {@link #FlinkKafkaProducer(String, KafkaSerializationSchema, Properties, - * Semantic)} - */ - @Deprecated - public FlinkKafkaProducer( - String defaultTopicId, - KeyedSerializationSchema serializationSchema, - Properties producerConfig, - Optional> customPartitioner, - Semantic semantic, - int kafkaProducersPoolSize) { - this( - defaultTopicId, - serializationSchema, - customPartitioner.orElse(null), - null, /* kafka serialization schema */ - producerConfig, - semantic, - kafkaProducersPoolSize); - } - - /** - * Creates a {@link FlinkKafkaProducer} for a given topic. The sink produces its input to the - * topic. It accepts a {@link KafkaSerializationSchema} for serializing records to a {@link - * ProducerRecord}, including partitioning information. - * - * @param defaultTopic The default topic to write data to - * @param serializationSchema A serializable serialization schema for turning user objects into - * a kafka-consumable byte[] supporting key/value messages - * @param producerConfig Configuration properties for the KafkaProducer. 'bootstrap.servers.' is - * the only required argument. - * @param semantic Defines semantic that will be used by this producer (see {@link Semantic}). - */ - public FlinkKafkaProducer( - String defaultTopic, - KafkaSerializationSchema serializationSchema, - Properties producerConfig, - Semantic semantic) { - this( - defaultTopic, - serializationSchema, - producerConfig, - semantic, - DEFAULT_KAFKA_PRODUCERS_POOL_SIZE); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces its input to the topic. It - * accepts a {@link KafkaSerializationSchema} and possibly a custom {@link - * FlinkKafkaPartitioner}. - * - * @param defaultTopic The default topic to write data to - * @param serializationSchema A serializable serialization schema for turning user objects into - * a kafka-consumable byte[] supporting key/value messages - * @param producerConfig Configuration properties for the KafkaProducer. 'bootstrap.servers.' is - * the only required argument. - * @param semantic Defines semantic that will be used by this producer (see {@link Semantic}). - * @param kafkaProducersPoolSize Overwrite default KafkaProducers pool size (see {@link - * Semantic#EXACTLY_ONCE}). - */ - public FlinkKafkaProducer( - String defaultTopic, - KafkaSerializationSchema serializationSchema, - Properties producerConfig, - Semantic semantic, - int kafkaProducersPoolSize) { - this( - defaultTopic, - null, - null, /* keyed schema and FlinkKafkaPartitioner */ - serializationSchema, - producerConfig, - semantic, - kafkaProducersPoolSize); - } - - /** - * Creates a FlinkKafkaProducer for a given topic. The sink produces its input to the topic. It - * accepts a {@link KafkaSerializationSchema} and possibly a custom {@link - * FlinkKafkaPartitioner}. - * - *

    If a partitioner is not provided, written records will be partitioned by the attached key - * of each record (as determined by {@link KeyedSerializationSchema#serializeKey(Object)}). If - * written records do not have a key (i.e., {@link - * KeyedSerializationSchema#serializeKey(Object)} returns {@code null}), they will be - * distributed to Kafka partitions in a round-robin fashion. - * - * @param defaultTopic The default topic to write data to - * @param keyedSchema A serializable serialization schema for turning user objects into a - * kafka-consumable byte[] supporting key/value messages - * @param customPartitioner A serializable partitioner for assigning messages to Kafka - * partitions. If a partitioner is not provided, records will be partitioned by the key of - * each record (determined by {@link KeyedSerializationSchema#serializeKey(Object)}). If the - * keys are {@code null}, then records will be distributed to Kafka partitions in a - * round-robin fashion. - * @param kafkaSchema A serializable serialization schema for turning user objects into a - * kafka-consumable byte[] supporting key/value messages - * @param producerConfig Configuration properties for the KafkaProducer. 'bootstrap.servers.' is - * the only required argument. - * @param semantic Defines semantic that will be used by this producer (see {@link Semantic}). - * @param kafkaProducersPoolSize Overwrite default KafkaProducers pool size (see {@link - * Semantic#EXACTLY_ONCE}). - */ - private FlinkKafkaProducer( - String defaultTopic, - KeyedSerializationSchema keyedSchema, - FlinkKafkaPartitioner customPartitioner, - KafkaSerializationSchema kafkaSchema, - Properties producerConfig, - Semantic semantic, - int kafkaProducersPoolSize) { - super(new TransactionStateSerializer(), new ContextStateSerializer()); - - this.defaultTopicId = checkNotNull(defaultTopic, "defaultTopic is null"); - - if (kafkaSchema != null) { - this.keyedSchema = null; - this.kafkaSchema = kafkaSchema; - this.flinkKafkaPartitioner = null; - ClosureCleaner.clean( - this.kafkaSchema, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); - - if (customPartitioner != null) { - throw new IllegalArgumentException( - "Customer partitioner can only be used when" - + "using a KeyedSerializationSchema or SerializationSchema."); - } - } else if (keyedSchema != null) { - this.kafkaSchema = null; - this.keyedSchema = keyedSchema; - this.flinkKafkaPartitioner = customPartitioner; - ClosureCleaner.clean( - this.flinkKafkaPartitioner, - ExecutionConfig.ClosureCleanerLevel.RECURSIVE, - true); - ClosureCleaner.clean( - this.keyedSchema, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); - } else { - throw new IllegalArgumentException( - "You must provide either a KafkaSerializationSchema or a" - + "KeyedSerializationSchema."); - } - - this.producerConfig = checkNotNull(producerConfig, "producerConfig is null"); - this.semantic = checkNotNull(semantic, "semantic is null"); - this.kafkaProducersPoolSize = kafkaProducersPoolSize; - checkState(kafkaProducersPoolSize > 0, "kafkaProducersPoolSize must be non empty"); - - // set the producer configuration properties for kafka record key value serializers. - if (!producerConfig.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG)) { - this.producerConfig.put( - ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, - ByteArraySerializer.class.getName()); - } else { - LOG.warn( - "Overwriting the '{}' is not recommended", - ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG); - } - - if (!producerConfig.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG)) { - this.producerConfig.put( - ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, - ByteArraySerializer.class.getName()); - } else { - LOG.warn( - "Overwriting the '{}' is not recommended", - ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG); - } - - // eagerly ensure that bootstrap servers are set. - if (!this.producerConfig.containsKey(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)) { - throw new IllegalArgumentException( - ProducerConfig.BOOTSTRAP_SERVERS_CONFIG - + " must be supplied in the producer config properties."); - } - - if (!producerConfig.containsKey(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG)) { - long timeout = DEFAULT_KAFKA_TRANSACTION_TIMEOUT.toMilliseconds(); - checkState( - timeout < Integer.MAX_VALUE && timeout > 0, - "timeout does not fit into 32 bit integer"); - this.producerConfig.put(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, (int) timeout); - LOG.warn( - "Property [{}] not specified. Setting it to {}", - ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, - DEFAULT_KAFKA_TRANSACTION_TIMEOUT); - } - - // Enable transactionTimeoutWarnings to avoid silent data loss - // See KAFKA-6119 (affects versions 0.11.0.0 and 0.11.0.1): - // The KafkaProducer may not throw an exception if the transaction failed to commit - if (semantic == Semantic.EXACTLY_ONCE) { - final long transactionTimeout = getTransactionTimeout(producerConfig); - super.setTransactionTimeout(transactionTimeout); - super.enableTransactionTimeoutWarnings(0.8); - } - - this.topicPartitionsMap = new HashMap<>(); - } - - // ---------------------------------- Properties -------------------------- - - /** - * If set to true, Flink will write the (event time) timestamp attached to each record into - * Kafka. Timestamps must be positive for Kafka to accept them. - * - * @param writeTimestampToKafka Flag indicating if Flink's internal timestamps are written to - * Kafka. - */ - public void setWriteTimestampToKafka(boolean writeTimestampToKafka) { - this.writeTimestampToKafka = writeTimestampToKafka; - if (kafkaSchema instanceof KafkaSerializationSchemaWrapper) { - ((KafkaSerializationSchemaWrapper) kafkaSchema) - .setWriteTimestamp(writeTimestampToKafka); - } - } - - /** - * Defines whether the producer should fail on errors, or only log them. If this is set to true, - * then exceptions will be only logged, if set to false, exceptions will be eventually thrown - * and cause the streaming program to fail (and enter recovery). - * - * @param logFailuresOnly The flag to indicate logging-only on exceptions. - */ - public void setLogFailuresOnly(boolean logFailuresOnly) { - this.logFailuresOnly = logFailuresOnly; - } - - /** - * Specifies the prefix of the transactional.id property to be used by the producers when - * communicating with Kafka. If not set, the transactional.id will be prefixed with {@code - * taskName + "-" + operatorUid}. - * - *

    Note that, if we change the prefix when the Flink application previously failed before - * first checkpoint completed or we are starting new batch of {@link FlinkKafkaProducer} from - * scratch without clean shutdown of the previous one, since we don't know what was the - * previously used transactional.id prefix, there will be some lingering transactions left. - * - * @param transactionalIdPrefix the transactional.id prefix - * @throws NullPointerException Thrown, if the transactionalIdPrefix was null. - */ - public void setTransactionalIdPrefix(String transactionalIdPrefix) { - this.transactionalIdPrefix = checkNotNull(transactionalIdPrefix); - } - - /** - * Disables the propagation of exceptions thrown when committing presumably timed out Kafka - * transactions during recovery of the job. If a Kafka transaction is timed out, a commit will - * never be successful. Hence, use this feature to avoid recovery loops of the Job. Exceptions - * will still be logged to inform the user that data loss might have occurred. - * - *

    Note that we use {@link System#currentTimeMillis()} to track the age of a transaction. - * Moreover, only exceptions thrown during the recovery are caught, i.e., the producer will - * attempt at least one commit of the transaction before giving up. - */ - @Override - public FlinkKafkaProducer ignoreFailuresAfterTransactionTimeout() { - super.ignoreFailuresAfterTransactionTimeout(); - return this; - } - - // ----------------------------------- Utilities -------------------------- - - /** Initializes the connection to Kafka. */ - @Override - public void open(Configuration configuration) throws Exception { - if (logFailuresOnly) { - callback = - new Callback() { - @Override - public void onCompletion(RecordMetadata metadata, Exception e) { - if (e != null) { - LOG.error( - "Error while sending record to Kafka: " + e.getMessage(), - e); - } - acknowledgeMessage(); - } - }; - } else { - callback = - new Callback() { - @Override - public void onCompletion(RecordMetadata metadata, Exception exception) { - if (exception != null && asyncException == null) { - asyncException = exception; - } - acknowledgeMessage(); - } - }; - } - - RuntimeContext ctx = getRuntimeContext(); - - if (flinkKafkaPartitioner != null) { - flinkKafkaPartitioner.open( - ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks()); - } - - if (kafkaSchema instanceof KafkaContextAware) { - KafkaContextAware contextAwareSchema = (KafkaContextAware) kafkaSchema; - contextAwareSchema.setParallelInstanceId(ctx.getIndexOfThisSubtask()); - contextAwareSchema.setNumParallelInstances(ctx.getNumberOfParallelSubtasks()); - } - - if (kafkaSchema != null) { - kafkaSchema.open( - RuntimeContextInitializationContextAdapters.serializationAdapter( - getRuntimeContext(), metricGroup -> metricGroup.addGroup("user"))); - } - - super.open(configuration); - } - - @Override - public void invoke(KafkaTransactionState transaction, IN next, Context context) - throws FlinkKafkaException { - checkErroneous(); - - ProducerRecord record; - if (keyedSchema != null) { - byte[] serializedKey = keyedSchema.serializeKey(next); - byte[] serializedValue = keyedSchema.serializeValue(next); - String targetTopic = keyedSchema.getTargetTopic(next); - if (targetTopic == null) { - targetTopic = defaultTopicId; - } - - Long timestamp = null; - if (this.writeTimestampToKafka) { - timestamp = context.timestamp(); - } - - int[] partitions = topicPartitionsMap.get(targetTopic); - if (null == partitions) { - partitions = getPartitionsByTopic(targetTopic, transaction.producer); - topicPartitionsMap.put(targetTopic, partitions); - } - if (flinkKafkaPartitioner != null) { - record = - new ProducerRecord<>( - targetTopic, - flinkKafkaPartitioner.partition( - next, - serializedKey, - serializedValue, - targetTopic, - partitions), - timestamp, - serializedKey, - serializedValue); - } else { - record = - new ProducerRecord<>( - targetTopic, null, timestamp, serializedKey, serializedValue); - } - } else if (kafkaSchema != null) { - if (kafkaSchema instanceof KafkaContextAware) { - @SuppressWarnings("unchecked") - KafkaContextAware contextAwareSchema = (KafkaContextAware) kafkaSchema; - - String targetTopic = contextAwareSchema.getTargetTopic(next); - if (targetTopic == null) { - targetTopic = defaultTopicId; - } - int[] partitions = topicPartitionsMap.get(targetTopic); - - if (null == partitions) { - partitions = getPartitionsByTopic(targetTopic, transaction.producer); - topicPartitionsMap.put(targetTopic, partitions); - } - - contextAwareSchema.setPartitions(partitions); - } - record = kafkaSchema.serialize(next, context.timestamp()); - } else { - throw new RuntimeException( - "We have neither KafkaSerializationSchema nor KeyedSerializationSchema, this" - + "is a bug."); - } - - pendingRecords.incrementAndGet(); - transaction.producer.send(record, callback); - } - - @Override - public void close() throws FlinkKafkaException { - // First close the producer for current transaction. - try { - final KafkaTransactionState currentTransaction = currentTransaction(); - LOG.error( - "Closing producer for current transaction: {} {}", - currentTransaction, - currentTransaction != null ? currentTransaction.producer : null); - if (currentTransaction != null) { - // to avoid exceptions on aborting transactions with some pending records - flush(currentTransaction); - - // normal abort for AT_LEAST_ONCE and NONE do not clean up resources because of - // producer reusing, thus - // we need to close it manually - switch (semantic) { - case EXACTLY_ONCE: - break; - case AT_LEAST_ONCE: - case NONE: - currentTransaction.producer.flush(); - currentTransaction.producer.close(Duration.ofSeconds(0)); - break; - } - } - super.close(); - } catch (Exception e) { - asyncException = ExceptionUtils.firstOrSuppressed(e, asyncException); - } finally { - // We may have to close producer of the current transaction in case some exception was - // thrown before - // the normal close routine finishes. - if (currentTransaction() != null) { - try { - currentTransaction().producer.close(Duration.ofSeconds(0)); - } catch (Throwable t) { - LOG.warn("Error closing producer.", t); - } - } - // Make sure all the producers for pending transactions are closed. - pendingTransactions() - .forEach( - transaction -> { - try { - transaction.getValue().producer.close(Duration.ofSeconds(0)); - } catch (Throwable t) { - LOG.warn("Error closing producer.", t); - } - }); - // make sure we propagate pending errors - checkErroneous(); - } - } - - // ------------------- Logic for handling checkpoint flushing -------------------------- // - - @Override - protected KafkaTransactionState beginTransaction() throws FlinkKafkaException { - switch (semantic) { - case EXACTLY_ONCE: - FlinkKafkaInternalProducer producer = createTransactionalProducer(); - producer.beginTransaction(); - return new KafkaTransactionState(producer.getTransactionalId(), producer); - case AT_LEAST_ONCE: - case NONE: - // Do not create new producer on each beginTransaction() if it is not necessary - final KafkaTransactionState currentTransaction = currentTransaction(); - if (currentTransaction != null && currentTransaction.producer != null) { - return new KafkaTransactionState(currentTransaction.producer); - } - return new KafkaTransactionState(initNonTransactionalProducer(true)); - default: - throw new UnsupportedOperationException("Not implemented semantic"); - } - } - - @Override - protected void preCommit(KafkaTransactionState transaction) throws FlinkKafkaException { - switch (semantic) { - case EXACTLY_ONCE: - case AT_LEAST_ONCE: - flush(transaction); - break; - case NONE: - break; - default: - throw new UnsupportedOperationException("Not implemented semantic"); - } - checkErroneous(); - } - - @Override - protected void commit(KafkaTransactionState transaction) { - if (transaction.isTransactional()) { - try { - transaction.producer.commitTransaction(); - } finally { - recycleTransactionalProducer(transaction.producer); - } - } - } - - @Override - protected void recoverAndCommit(KafkaTransactionState transaction) { - if (transaction.isTransactional()) { - FlinkKafkaInternalProducer producer = null; - try { - producer = initTransactionalProducer(transaction.transactionalId, false); - producer.resumeTransaction(transaction.producerId, transaction.epoch); - producer.commitTransaction(); - } catch (InvalidTxnStateException e) { - LOG.warn( - "Unable to commit recovered transaction ({}) because it's in an invalid state. " - + "Most likely the transaction has been aborted for some reason. Please check the Kafka logs for more details.", - transaction, - e); - } catch (ProducerFencedException e) { - LOG.warn( - "Unable to commit recovered transaction ({}) because its producer is already fenced." - + " This means that you either have a different producer with the same '{}' or" - + " recovery took longer than '{}' ({}ms). In both cases this most likely signals data loss," - + " please consult the Flink documentation for more details.", - transaction, - ProducerConfig.TRANSACTIONAL_ID_CONFIG, - ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, - getTransactionTimeout(producerConfig), - e); - } finally { - if (producer != null) { - producer.close(Duration.ofSeconds(0)); - } - } - } - } - - @Override - protected void abort(KafkaTransactionState transaction) { - if (transaction.isTransactional()) { - transaction.producer.abortTransaction(); - recycleTransactionalProducer(transaction.producer); - } - } - - @Override - protected void recoverAndAbort(KafkaTransactionState transaction) { - if (transaction.isTransactional()) { - FlinkKafkaInternalProducer producer = null; - try { - producer = initTransactionalProducer(transaction.transactionalId, false); - producer.initTransactions(); - } finally { - if (producer != null) { - producer.close(Duration.ofSeconds(0)); - } - } - } - } - - /** - * ATTENTION to subclass implementors: When overriding this method, please always call - * {@code super.acknowledgeMessage()} to keep the invariants of the internal bookkeeping of the - * producer. If not, be sure to know what you are doing. - */ - protected void acknowledgeMessage() { - pendingRecords.decrementAndGet(); - } - - /** - * Flush pending records. - * - * @param transaction - */ - private void flush(KafkaTransactionState transaction) throws FlinkKafkaException { - if (transaction.producer != null) { - transaction.producer.flush(); - } - long pendingRecordsCount = pendingRecords.get(); - if (pendingRecordsCount != 0) { - throw new IllegalStateException( - "Pending record count must be zero at this point: " + pendingRecordsCount); - } - - // if the flushed requests has errors, we should propagate it also and fail the checkpoint - checkErroneous(); - } - - @Override - public void snapshotState(FunctionSnapshotContext context) throws Exception { - super.snapshotState(context); - - nextTransactionalIdHintState.clear(); - // To avoid duplication only first subtask keeps track of next transactional id hint. - // Otherwise all of the - // subtasks would write exactly same information. - if (getRuntimeContext().getIndexOfThisSubtask() == 0 && semantic == Semantic.EXACTLY_ONCE) { - checkState( - nextTransactionalIdHint != null, - "nextTransactionalIdHint must be set for EXACTLY_ONCE"); - long nextFreeTransactionalId = nextTransactionalIdHint.nextFreeTransactionalId; - - // If we scaled up, some (unknown) subtask must have created new transactional ids from - // scratch. In that - // case we adjust nextFreeTransactionalId by the range of transactionalIds that could be - // used for this - // scaling up. - if (getRuntimeContext().getNumberOfParallelSubtasks() - > nextTransactionalIdHint.lastParallelism) { - nextFreeTransactionalId += - getRuntimeContext().getNumberOfParallelSubtasks() * kafkaProducersPoolSize; - } - - nextTransactionalIdHintState.add( - new NextTransactionalIdHint( - getRuntimeContext().getNumberOfParallelSubtasks(), - nextFreeTransactionalId)); - } - } - - @Override - public void initializeState(FunctionInitializationContext context) throws Exception { - if (semantic != Semantic.NONE - && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) { - LOG.warn( - "Using {} semantic, but checkpointing is not enabled. Switching to {} semantic.", - semantic, - Semantic.NONE); - semantic = Semantic.NONE; - } - - nextTransactionalIdHintState = - context.getOperatorStateStore() - .getUnionListState(NEXT_TRANSACTIONAL_ID_HINT_DESCRIPTOR_V2); - - if (context.getOperatorStateStore() - .getRegisteredStateNames() - .contains(NEXT_TRANSACTIONAL_ID_HINT_DESCRIPTOR)) { - migrateNextTransactionalIdHindState(context); - } - - String actualTransactionalIdPrefix; - if (this.transactionalIdPrefix != null) { - actualTransactionalIdPrefix = this.transactionalIdPrefix; - } else { - String taskName = getRuntimeContext().getTaskName(); - // Kafka transactional IDs are limited in length to be less than the max value of - // a short, so we truncate here if necessary to a more reasonable length string. - if (taskName.length() > maxTaskNameSize) { - taskName = taskName.substring(0, maxTaskNameSize); - LOG.warn( - "Truncated task name for Kafka TransactionalId from {} to {}.", - getRuntimeContext().getTaskName(), - taskName); - } - actualTransactionalIdPrefix = - taskName - + "-" - + ((StreamingRuntimeContext) getRuntimeContext()).getOperatorUniqueID(); - } - transactionalIdsGenerator = - new TransactionalIdsGenerator( - actualTransactionalIdPrefix, - getRuntimeContext().getIndexOfThisSubtask(), - getRuntimeContext().getNumberOfParallelSubtasks(), - kafkaProducersPoolSize, - SAFE_SCALE_DOWN_FACTOR); - - if (semantic != Semantic.EXACTLY_ONCE) { - nextTransactionalIdHint = null; - } else { - List transactionalIdHints = new ArrayList<>(); - nextTransactionalIdHintState.get().forEach(transactionalIdHints::add); - - if (transactionalIdHints.size() > 1) { - throw new IllegalStateException( - "There should be at most one next transactional id hint written by the first subtask"); - } else if (transactionalIdHints.size() == 0) { - nextTransactionalIdHint = new NextTransactionalIdHint(0, 0); - - // this means that this is either: - // (1) the first execution of this application - // (2) previous execution has failed before first checkpoint completed - // - // in case of (2) we have to abort all previous transactions - abortTransactions(transactionalIdsGenerator.generateIdsToAbort()); - } else { - nextTransactionalIdHint = transactionalIdHints.get(0); - } - } - - super.initializeState(context); - } - - @Override - protected Optional initializeUserContext() { - if (semantic != Semantic.EXACTLY_ONCE) { - return Optional.empty(); - } - - Set transactionalIds = generateNewTransactionalIds(); - resetAvailableTransactionalIdsPool(transactionalIds); - return Optional.of(new KafkaTransactionContext(transactionalIds)); - } - - private Set generateNewTransactionalIds() { - checkState( - nextTransactionalIdHint != null, - "nextTransactionalIdHint must be present for EXACTLY_ONCE"); - - Set transactionalIds = - transactionalIdsGenerator.generateIdsToUse( - nextTransactionalIdHint.nextFreeTransactionalId); - LOG.info("Generated new transactionalIds {}", transactionalIds); - return transactionalIds; - } - - @Override - protected void finishProcessing(@Nullable KafkaTransactionState transaction) { - super.finishProcessing(transaction); - // TwoPhaseCommitSink sets transaction = null on final checkpoint and thus closing will leak - // the producer. For transactional producers, we track the producer in pendingTransactions. - if (transaction != null && !transaction.isTransactional()) { - transaction.producer.flush(); - transaction.producer.close(Duration.ZERO); - } - } - - @Override - protected void finishRecoveringContext(Collection handledTransactions) { - cleanUpUserContext(handledTransactions); - resetAvailableTransactionalIdsPool(getUserContext().get().transactionalIds); - LOG.info("Recovered transactionalIds {}", getUserContext().get().transactionalIds); - } - - protected FlinkKafkaInternalProducer createProducer() { - return new FlinkKafkaInternalProducer<>(this.producerConfig); - } - - /** - * After initialization make sure that all previous transactions from the current user context - * have been completed. - * - * @param handledTransactions transactions which were already committed or aborted and do not - * need further handling - */ - private void cleanUpUserContext(Collection handledTransactions) { - if (!getUserContext().isPresent()) { - return; - } - HashSet abortTransactions = new HashSet<>(getUserContext().get().transactionalIds); - handledTransactions.forEach( - kafkaTransactionState -> - abortTransactions.remove(kafkaTransactionState.transactionalId)); - abortTransactions(abortTransactions); - } - - private void resetAvailableTransactionalIdsPool(Collection transactionalIds) { - availableTransactionalIds.clear(); - availableTransactionalIds.addAll(transactionalIds); - } - - // ----------------------------------- Utilities -------------------------- - - private void abortTransactions(final Set transactionalIds) { - final ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - transactionalIds - .parallelStream() - .forEach( - transactionalId -> { - // The parallelStream executes the consumer in a separated thread pool. - // Because the consumer(e.g. Kafka) uses the context classloader to - // construct some class - // we should set the correct classloader for it. - try (TemporaryClassLoaderContext ignored = - TemporaryClassLoaderContext.of(classLoader)) { - // don't mess with the original configuration or any other - // properties of the - // original object - // -> create an internal kafka producer on our own and do not rely - // on - // initTransactionalProducer(). - final Properties myConfig = new Properties(); - myConfig.putAll(producerConfig); - initTransactionalProducerConfig(myConfig, transactionalId); - FlinkKafkaInternalProducer kafkaProducer = null; - try { - kafkaProducer = new FlinkKafkaInternalProducer<>(myConfig); - // it suffices to call initTransactions - this will abort any - // lingering transactions - kafkaProducer.initTransactions(); - } finally { - if (kafkaProducer != null) { - kafkaProducer.close(Duration.ofSeconds(0)); - } - } - } - }); - } - - int getTransactionCoordinatorId() { - final KafkaTransactionState currentTransaction = currentTransaction(); - if (currentTransaction == null || currentTransaction.producer == null) { - throw new IllegalArgumentException(); - } - return currentTransaction.producer.getTransactionCoordinatorId(); - } - - @VisibleForTesting - String getTransactionalId() { - final KafkaTransactionState currentTransaction = currentTransaction(); - if (currentTransaction == null || currentTransaction.producer == null) { - throw new IllegalArgumentException(); - } - return currentTransaction.producer.getTransactionalId(); - } - - /** - * For each checkpoint we create new {@link FlinkKafkaInternalProducer} so that new transactions - * will not clash with transactions created during previous checkpoints ({@code - * producer.initTransactions()} assures that we obtain new producerId and epoch counters). - */ - private FlinkKafkaInternalProducer createTransactionalProducer() - throws FlinkKafkaException { - String transactionalId = availableTransactionalIds.poll(); - if (transactionalId == null) { - throw new FlinkKafkaException( - FlinkKafkaErrorCode.PRODUCERS_POOL_EMPTY, - "Too many ongoing snapshots. Increase kafka producers pool size or decrease number of concurrent checkpoints."); - } - FlinkKafkaInternalProducer producer = - initTransactionalProducer(transactionalId, true); - producer.initTransactions(); - return producer; - } - - private void recycleTransactionalProducer(FlinkKafkaInternalProducer producer) { - availableTransactionalIds.add(producer.getTransactionalId()); - producer.flush(); - producer.close(Duration.ofSeconds(0)); - } - - private FlinkKafkaInternalProducer initTransactionalProducer( - String transactionalId, boolean registerMetrics) { - initTransactionalProducerConfig(producerConfig, transactionalId); - return initProducer(registerMetrics); - } - - private static void initTransactionalProducerConfig( - Properties producerConfig, String transactionalId) { - producerConfig.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, transactionalId); - } - - private FlinkKafkaInternalProducer initNonTransactionalProducer( - boolean registerMetrics) { - producerConfig.remove(ProducerConfig.TRANSACTIONAL_ID_CONFIG); - return initProducer(registerMetrics); - } - - private FlinkKafkaInternalProducer initProducer(boolean registerMetrics) { - FlinkKafkaInternalProducer producer = createProducer(); - - LOG.info( - "Starting FlinkKafkaInternalProducer ({}/{}) to produce into default topic {}", - getRuntimeContext().getIndexOfThisSubtask() + 1, - getRuntimeContext().getNumberOfParallelSubtasks(), - defaultTopicId); - - // register Kafka metrics to Flink accumulators - if (registerMetrics - && !Boolean.parseBoolean( - producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) { - Map metrics = producer.metrics(); - - if (metrics == null) { - // MapR's Kafka implementation returns null here. - LOG.info("Producer implementation does not support metrics"); - } else { - final MetricGroup kafkaMetricGroup = - getRuntimeContext().getMetricGroup().addGroup("KafkaProducer"); - for (Map.Entry entry : metrics.entrySet()) { - String name = entry.getKey().name(); - Metric metric = entry.getValue(); - - KafkaMetricMutableWrapper wrapper = previouslyCreatedMetrics.get(name); - if (wrapper != null) { - wrapper.setKafkaMetric(metric); - } else { - // TODO: somehow merge metrics from all active producers? - wrapper = new KafkaMetricMutableWrapper(metric); - previouslyCreatedMetrics.put(name, wrapper); - kafkaMetricGroup.gauge(name, wrapper); - } - } - } - } - LOG.error("InitProducer {} {}", producerConfig, producer); - return producer; - } - - protected void checkErroneous() throws FlinkKafkaException { - Exception e = asyncException; - if (e != null) { - // prevent double throwing - asyncException = null; - throw new FlinkKafkaException( - FlinkKafkaErrorCode.EXTERNAL_ERROR, - "Failed to send data to Kafka: " + e.getMessage(), - e); - } - } - - private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { - in.defaultReadObject(); - } - - private void migrateNextTransactionalIdHindState(FunctionInitializationContext context) - throws Exception { - ListState oldNextTransactionalIdHintState = - context.getOperatorStateStore() - .getUnionListState(NEXT_TRANSACTIONAL_ID_HINT_DESCRIPTOR); - nextTransactionalIdHintState = - context.getOperatorStateStore() - .getUnionListState(NEXT_TRANSACTIONAL_ID_HINT_DESCRIPTOR_V2); - - List oldTransactionalIdHints = new ArrayList<>(); - oldNextTransactionalIdHintState.get().forEach(oldTransactionalIdHints::add); - - if (!oldTransactionalIdHints.isEmpty()) { - nextTransactionalIdHintState.addAll(oldTransactionalIdHints); - // clear old state - oldNextTransactionalIdHintState.clear(); - } - } - - private static Properties getPropertiesFromBrokerList(String brokerList) { - String[] elements = brokerList.split(","); - - // validate the broker addresses - for (String broker : elements) { - NetUtils.getCorrectHostnamePort(broker); - } - - Properties props = new Properties(); - props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList); - return props; - } - - protected static int[] getPartitionsByTopic(String topic, Producer producer) { - // the fetched list is immutable, so we're creating a mutable copy in order to sort it - List partitionsList = new ArrayList<>(producer.partitionsFor(topic)); - - // sort the partitions by partition id to make sure the fetched partition list is the same - // across subtasks - Collections.sort( - partitionsList, - new Comparator() { - @Override - public int compare(PartitionInfo o1, PartitionInfo o2) { - return Integer.compare(o1.partition(), o2.partition()); - } - }); - - int[] partitions = new int[partitionsList.size()]; - for (int i = 0; i < partitions.length; i++) { - partitions[i] = partitionsList.get(i).partition(); - } - - return partitions; - } - - public static long getTransactionTimeout(Properties producerConfig) { - final Object object = producerConfig.get(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG); - if (object instanceof String && StringUtils.isNumeric((String) object)) { - return Long.parseLong((String) object); - } else if (object instanceof Number) { - return ((Number) object).longValue(); - } else { - throw new IllegalArgumentException( - ProducerConfig.TRANSACTION_TIMEOUT_CONFIG + " must be numeric, was " + object); - } - } - - /** State for handling transactions. */ - @VisibleForTesting - @Internal - public static class KafkaTransactionState { - - private final transient FlinkKafkaInternalProducer producer; - - @Nullable final String transactionalId; - - final long producerId; - - final short epoch; - - @VisibleForTesting - public KafkaTransactionState( - String transactionalId, FlinkKafkaInternalProducer producer) { - this(transactionalId, producer.getProducerId(), producer.getEpoch(), producer); - } - - @VisibleForTesting - public KafkaTransactionState(FlinkKafkaInternalProducer producer) { - this(null, -1, (short) -1, producer); - } - - @VisibleForTesting - public KafkaTransactionState( - @Nullable String transactionalId, - long producerId, - short epoch, - FlinkKafkaInternalProducer producer) { - this.transactionalId = transactionalId; - this.producerId = producerId; - this.epoch = epoch; - this.producer = producer; - } - - boolean isTransactional() { - return transactionalId != null; - } - - public FlinkKafkaInternalProducer getProducer() { - return producer; - } - - @Override - public String toString() { - return String.format( - "%s [transactionalId=%s, producerId=%s, epoch=%s]", - this.getClass().getSimpleName(), transactionalId, producerId, epoch); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - KafkaTransactionState that = (KafkaTransactionState) o; - - if (producerId != that.producerId) { - return false; - } - if (epoch != that.epoch) { - return false; - } - return transactionalId != null - ? transactionalId.equals(that.transactionalId) - : that.transactionalId == null; - } - - @Override - public int hashCode() { - int result = transactionalId != null ? transactionalId.hashCode() : 0; - result = 31 * result + (int) (producerId ^ (producerId >>> 32)); - result = 31 * result + (int) epoch; - return result; - } - } - - /** - * Context associated to this instance of the {@link FlinkKafkaProducer}. User for keeping track - * of the transactionalIds. - */ - @VisibleForTesting - @Internal - public static class KafkaTransactionContext { - final Set transactionalIds; - - @VisibleForTesting - public KafkaTransactionContext(Set transactionalIds) { - checkNotNull(transactionalIds); - this.transactionalIds = transactionalIds; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - KafkaTransactionContext that = (KafkaTransactionContext) o; - - return transactionalIds.equals(that.transactionalIds); - } - - @Override - public int hashCode() { - return transactionalIds.hashCode(); - } - } - - /** {@link TypeSerializer} for {@link KafkaTransactionState}. */ - @VisibleForTesting - @Internal - public static class TransactionStateSerializer - extends TypeSerializerSingleton { - - private static final long serialVersionUID = 1L; - - @Override - public boolean isImmutableType() { - return true; - } - - @Override - public KafkaTransactionState createInstance() { - return null; - } - - @Override - public KafkaTransactionState copy(KafkaTransactionState from) { - return from; - } - - @Override - public KafkaTransactionState copy(KafkaTransactionState from, KafkaTransactionState reuse) { - return from; - } - - @Override - public int getLength() { - return -1; - } - - @Override - public void serialize(KafkaTransactionState record, DataOutputView target) - throws IOException { - if (record.transactionalId == null) { - target.writeBoolean(false); - } else { - target.writeBoolean(true); - target.writeUTF(record.transactionalId); - } - target.writeLong(record.producerId); - target.writeShort(record.epoch); - } - - @Override - public KafkaTransactionState deserialize(DataInputView source) throws IOException { - String transactionalId = null; - if (source.readBoolean()) { - transactionalId = source.readUTF(); - } - long producerId = source.readLong(); - short epoch = source.readShort(); - return new KafkaTransactionState(transactionalId, producerId, epoch, null); - } - - @Override - public KafkaTransactionState deserialize(KafkaTransactionState reuse, DataInputView source) - throws IOException { - return deserialize(source); - } - - @Override - public void copy(DataInputView source, DataOutputView target) throws IOException { - boolean hasTransactionalId = source.readBoolean(); - target.writeBoolean(hasTransactionalId); - if (hasTransactionalId) { - target.writeUTF(source.readUTF()); - } - target.writeLong(source.readLong()); - target.writeShort(source.readShort()); - } - - // ----------------------------------------------------------------------------------- - - @Override - public TypeSerializerSnapshot snapshotConfiguration() { - return new TransactionStateSerializerSnapshot(); - } - - /** Serializer configuration snapshot for compatibility and format evolution. */ - @SuppressWarnings("WeakerAccess") - public static final class TransactionStateSerializerSnapshot - extends SimpleTypeSerializerSnapshot { - - public TransactionStateSerializerSnapshot() { - super(TransactionStateSerializer::new); - } - } - } - - /** {@link TypeSerializer} for {@link KafkaTransactionContext}. */ - @VisibleForTesting - @Internal - public static class ContextStateSerializer - extends TypeSerializerSingleton { - - private static final long serialVersionUID = 1L; - - @Override - public boolean isImmutableType() { - return true; - } - - @Override - public KafkaTransactionContext createInstance() { - return null; - } - - @Override - public KafkaTransactionContext copy(KafkaTransactionContext from) { - return from; - } - - @Override - public KafkaTransactionContext copy( - KafkaTransactionContext from, KafkaTransactionContext reuse) { - return from; - } - - @Override - public int getLength() { - return -1; - } - - @Override - public void serialize(KafkaTransactionContext record, DataOutputView target) - throws IOException { - int numIds = record.transactionalIds.size(); - target.writeInt(numIds); - for (String id : record.transactionalIds) { - target.writeUTF(id); - } - } - - @Override - public KafkaTransactionContext deserialize(DataInputView source) throws IOException { - int numIds = source.readInt(); - Set ids = new HashSet<>(numIds); - for (int i = 0; i < numIds; i++) { - ids.add(source.readUTF()); - } - return new KafkaTransactionContext(ids); - } - - @Override - public KafkaTransactionContext deserialize( - KafkaTransactionContext reuse, DataInputView source) throws IOException { - return deserialize(source); - } - - @Override - public void copy(DataInputView source, DataOutputView target) throws IOException { - int numIds = source.readInt(); - target.writeInt(numIds); - for (int i = 0; i < numIds; i++) { - target.writeUTF(source.readUTF()); - } - } - - // ----------------------------------------------------------------------------------- - - @Override - public TypeSerializerSnapshot snapshotConfiguration() { - return new ContextStateSerializerSnapshot(); - } - - /** Serializer configuration snapshot for compatibility and format evolution. */ - @SuppressWarnings("WeakerAccess") - public static final class ContextStateSerializerSnapshot - extends SimpleTypeSerializerSnapshot { - - public ContextStateSerializerSnapshot() { - super(ContextStateSerializer::new); - } - } - } - - /** Keep information required to deduce next safe to use transactional id. */ - public static class NextTransactionalIdHint { - public int lastParallelism = 0; - public long nextFreeTransactionalId = 0; - - public NextTransactionalIdHint() { - this(0, 0); - } - - public NextTransactionalIdHint(int parallelism, long nextFreeTransactionalId) { - this.lastParallelism = parallelism; - this.nextFreeTransactionalId = nextFreeTransactionalId; - } - - @Override - public String toString() { - return "NextTransactionalIdHint[" - + "lastParallelism=" - + lastParallelism - + ", nextFreeTransactionalId=" - + nextFreeTransactionalId - + ']'; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - NextTransactionalIdHint that = (NextTransactionalIdHint) o; - - if (lastParallelism != that.lastParallelism) { - return false; - } - return nextFreeTransactionalId == that.nextFreeTransactionalId; - } - - @Override - public int hashCode() { - int result = lastParallelism; - result = - 31 * result - + (int) (nextFreeTransactionalId ^ (nextFreeTransactionalId >>> 32)); - return result; - } - } - - /** {@link TypeSerializer} for {@link NextTransactionalIdHint}. */ - @VisibleForTesting - @Internal - public static class NextTransactionalIdHintSerializer - extends TypeSerializerSingleton { - - private static final long serialVersionUID = 1L; - - @Override - public boolean isImmutableType() { - return true; - } - - @Override - public NextTransactionalIdHint createInstance() { - return new NextTransactionalIdHint(); - } - - @Override - public NextTransactionalIdHint copy(NextTransactionalIdHint from) { - return from; - } - - @Override - public NextTransactionalIdHint copy( - NextTransactionalIdHint from, NextTransactionalIdHint reuse) { - return from; - } - - @Override - public int getLength() { - return Long.BYTES + Integer.BYTES; - } - - @Override - public void serialize(NextTransactionalIdHint record, DataOutputView target) - throws IOException { - target.writeLong(record.nextFreeTransactionalId); - target.writeInt(record.lastParallelism); - } - - @Override - public NextTransactionalIdHint deserialize(DataInputView source) throws IOException { - long nextFreeTransactionalId = source.readLong(); - int lastParallelism = source.readInt(); - return new NextTransactionalIdHint(lastParallelism, nextFreeTransactionalId); - } - - @Override - public NextTransactionalIdHint deserialize( - NextTransactionalIdHint reuse, DataInputView source) throws IOException { - return deserialize(source); - } - - @Override - public void copy(DataInputView source, DataOutputView target) throws IOException { - target.writeLong(source.readLong()); - target.writeInt(source.readInt()); - } - - @Override - public TypeSerializerSnapshot snapshotConfiguration() { - return new NextTransactionalIdHintSerializerSnapshot(); - } - - /** Serializer configuration snapshot for compatibility and format evolution. */ - @SuppressWarnings("WeakerAccess") - public static final class NextTransactionalIdHintSerializerSnapshot - extends SimpleTypeSerializerSnapshot { - - public NextTransactionalIdHintSerializerSnapshot() { - super(NextTransactionalIdHintSerializer::new); - } - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaContextAware.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaContextAware.java index d40139595..0332cf033 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaContextAware.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaContextAware.java @@ -17,7 +17,7 @@ package org.apache.flink.streaming.connectors.kafka; -import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.annotation.Internal; /** * An interface for {@link KafkaSerializationSchema KafkaSerializationSchemas} that need information @@ -26,11 +26,8 @@ * *

    You only need to override the methods for the information that you need. However, {@link * #getTargetTopic(Object)} is required because it is used to determine the available partitions. - * - * @deprecated Will be turned into internal API when {@link FlinkKafkaProducer} is removed. */ -@PublicEvolving -@Deprecated +@Internal public interface KafkaContextAware { /** diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaDeserializationSchema.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaDeserializationSchema.java index 8f15b921b..058b0301c 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaDeserializationSchema.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaDeserializationSchema.java @@ -17,7 +17,7 @@ package org.apache.flink.streaming.connectors.kafka; -import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.serialization.DeserializationSchema; import org.apache.flink.api.java.typeutils.ResultTypeQueryable; import org.apache.flink.util.Collector; @@ -31,10 +31,8 @@ * (Java/Scala objects) that are processed by Flink. * * @param The type created by the keyed deserialization schema. - * @deprecated Will be turned into internal API when {@link FlinkKafkaConsumer} is removed. */ -@PublicEvolving -@Deprecated +@Internal public interface KafkaDeserializationSchema extends Serializable, ResultTypeQueryable { /** diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaSerializationSchema.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaSerializationSchema.java index 7ed987fce..d941b727a 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaSerializationSchema.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaSerializationSchema.java @@ -17,7 +17,7 @@ package org.apache.flink.streaming.connectors.kafka; -import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.kafka.clients.producer.ProducerRecord; @@ -35,10 +35,8 @@ * which the Kafka Producer is running. * * @param the type of values being serialized - * @deprecated Will be turned into internal API when {@link FlinkKafkaProducer} is removed. */ -@PublicEvolving -@Deprecated +@Internal public interface KafkaSerializationSchema extends Serializable { /** diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/config/OffsetCommitMode.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/config/OffsetCommitMode.java deleted file mode 100644 index 6ad4f8337..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/config/OffsetCommitMode.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.config; - -import org.apache.flink.annotation.Internal; - -/** - * The offset commit mode represents the behaviour of how offsets are externally committed back to - * Kafka brokers / Zookeeper. - * - *

    The exact value of this is determined at runtime in the consumer subtasks. - */ -@Internal -@Deprecated -public enum OffsetCommitMode { - - /** Completely disable offset committing. */ - DISABLED, - - /** Commit offsets back to Kafka only when checkpoints are completed. */ - ON_CHECKPOINTS, - - /** - * Commit offsets periodically back to Kafka, using the auto commit functionality of internal - * Kafka clients. - */ - KAFKA_PERIODIC; -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/config/OffsetCommitModes.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/config/OffsetCommitModes.java deleted file mode 100644 index 32ac2f5f0..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/config/OffsetCommitModes.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.config; - -import org.apache.flink.annotation.Internal; - -/** Utilities for {@link OffsetCommitMode}. */ -@Internal -@Deprecated -public class OffsetCommitModes { - - /** - * Determine the offset commit mode using several configuration values. - * - * @param enableAutoCommit whether or not auto committing is enabled in the provided Kafka - * properties. - * @param enableCommitOnCheckpoint whether or not committing on checkpoints is enabled. - * @param enableCheckpointing whether or not checkpoint is enabled for the consumer. - * @return the offset commit mode to use, based on the configuration values. - */ - public static OffsetCommitMode fromConfiguration( - boolean enableAutoCommit, - boolean enableCommitOnCheckpoint, - boolean enableCheckpointing) { - - if (enableCheckpointing) { - // if checkpointing is enabled, the mode depends only on whether committing on - // checkpoints is enabled - return (enableCommitOnCheckpoint) - ? OffsetCommitMode.ON_CHECKPOINTS - : OffsetCommitMode.DISABLED; - } else { - // else, the mode depends only on whether auto committing is enabled in the provided - // Kafka properties - return (enableAutoCommit) ? OffsetCommitMode.KAFKA_PERIODIC : OffsetCommitMode.DISABLED; - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcher.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcher.java deleted file mode 100644 index 074363021..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcher.java +++ /dev/null @@ -1,620 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.eventtime.WatermarkOutput; -import org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.api.common.operators.ProcessingTimeService.ProcessingTimeCallback; -import org.apache.flink.metrics.Gauge; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext; -import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; -import org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.util.SerializedValue; - -import javax.annotation.Nonnull; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.stream.Collectors; - -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.COMMITTED_OFFSETS_METRICS_GAUGE; -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.CURRENT_OFFSETS_METRICS_GAUGE; -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.LEGACY_COMMITTED_OFFSETS_METRICS_GROUP; -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.LEGACY_CURRENT_OFFSETS_METRICS_GROUP; -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.OFFSETS_BY_PARTITION_METRICS_GROUP; -import static org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaConsumerMetricConstants.OFFSETS_BY_TOPIC_METRICS_GROUP; -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Base class for all fetchers, which implement the connections to Kafka brokers and pull records - * from Kafka partitions. - * - *

    This fetcher base class implements the logic around emitting records and tracking offsets, as - * well as around the optional timestamp assignment and watermark generation. - * - * @param The type of elements deserialized from Kafka's byte records, and emitted into the - * Flink data streams. - * @param The type of topic/partition identifier used by Kafka in the specific version. - */ -@Internal -@Deprecated -public abstract class AbstractFetcher { - - private static final int NO_TIMESTAMPS_WATERMARKS = 0; - private static final int WITH_WATERMARK_GENERATOR = 1; - - // ------------------------------------------------------------------------ - - /** The source context to emit records and watermarks to. */ - protected final SourceContext sourceContext; - - /** - * Wrapper around our SourceContext for allowing the {@link - * org.apache.flink.api.common.eventtime.WatermarkGenerator} to emit watermarks and mark - * idleness. - */ - protected final WatermarkOutput watermarkOutput; - - /** {@link WatermarkOutputMultiplexer} for supporting per-partition watermark generation. */ - private final WatermarkOutputMultiplexer watermarkOutputMultiplexer; - - /** - * The lock that guarantees that record emission and state updates are atomic, from the view of - * taking a checkpoint. - */ - protected final Object checkpointLock; - - /** All partitions (and their state) that this fetcher is subscribed to. */ - private final List> subscribedPartitionStates; - - /** - * Queue of partitions that are not yet assigned to any Kafka clients for consuming. Kafka - * version-specific implementations of {@link AbstractFetcher#runFetchLoop()} should - * continuously poll this queue for unassigned partitions, and start consuming them accordingly. - * - *

    All partitions added to this queue are guaranteed to have been added to {@link - * #subscribedPartitionStates} already. - */ - protected final ClosableBlockingQueue> - unassignedPartitionsQueue; - - /** The mode describing whether the fetcher also generates timestamps and watermarks. */ - private final int timestampWatermarkMode; - - /** - * Optional watermark strategy that will be run per Kafka partition, to exploit per-partition - * timestamp characteristics. The watermark strategy is kept in serialized form, to deserialize - * it into multiple copies. - */ - private final SerializedValue> watermarkStrategy; - - /** User class loader used to deserialize watermark assigners. */ - private final ClassLoader userCodeClassLoader; - - // ------------------------------------------------------------------------ - // Metrics - // ------------------------------------------------------------------------ - - /** - * Flag indicating whether or not metrics should be exposed. If {@code true}, offset metrics - * (e.g. current offset, committed offset) and Kafka-shipped metrics will be registered. - */ - private final boolean useMetrics; - - /** - * The metric group which all metrics for the consumer should be registered to. This metric - * group is defined under the user scope {@link - * KafkaConsumerMetricConstants#KAFKA_CONSUMER_METRICS_GROUP}. - */ - private final MetricGroup consumerMetricGroup; - - @SuppressWarnings("DeprecatedIsStillUsed") - @Deprecated - private final MetricGroup legacyCurrentOffsetsMetricGroup; - - @SuppressWarnings("DeprecatedIsStillUsed") - @Deprecated - private final MetricGroup legacyCommittedOffsetsMetricGroup; - - protected AbstractFetcher( - SourceContext sourceContext, - Map seedPartitionsWithInitialOffsets, - SerializedValue> watermarkStrategy, - ProcessingTimeService processingTimeProvider, - long autoWatermarkInterval, - ClassLoader userCodeClassLoader, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - this.sourceContext = checkNotNull(sourceContext); - this.watermarkOutput = new SourceContextWatermarkOutputAdapter<>(sourceContext); - this.watermarkOutputMultiplexer = new WatermarkOutputMultiplexer(watermarkOutput); - this.checkpointLock = sourceContext.getCheckpointLock(); - this.userCodeClassLoader = checkNotNull(userCodeClassLoader); - - this.useMetrics = useMetrics; - this.consumerMetricGroup = checkNotNull(consumerMetricGroup); - this.legacyCurrentOffsetsMetricGroup = - consumerMetricGroup.addGroup(LEGACY_CURRENT_OFFSETS_METRICS_GROUP); - this.legacyCommittedOffsetsMetricGroup = - consumerMetricGroup.addGroup(LEGACY_COMMITTED_OFFSETS_METRICS_GROUP); - - this.watermarkStrategy = watermarkStrategy; - - if (watermarkStrategy == null) { - timestampWatermarkMode = NO_TIMESTAMPS_WATERMARKS; - } else { - timestampWatermarkMode = WITH_WATERMARK_GENERATOR; - } - - this.unassignedPartitionsQueue = new ClosableBlockingQueue<>(); - - // initialize subscribed partition states with seed partitions - this.subscribedPartitionStates = - createPartitionStateHolders( - seedPartitionsWithInitialOffsets, - timestampWatermarkMode, - watermarkStrategy, - userCodeClassLoader); - - // check that all seed partition states have a defined offset - for (KafkaTopicPartitionState partitionState : subscribedPartitionStates) { - if (!partitionState.isOffsetDefined()) { - throw new IllegalArgumentException( - "The fetcher was assigned seed partitions with undefined initial offsets."); - } - } - - // all seed partitions are not assigned yet, so should be added to the unassigned partitions - // queue - for (KafkaTopicPartitionState partition : subscribedPartitionStates) { - unassignedPartitionsQueue.add(partition); - } - - // register metrics for the initial seed partitions - if (useMetrics) { - registerOffsetMetrics(consumerMetricGroup, subscribedPartitionStates); - } - - // if we have periodic watermarks, kick off the interval scheduler - if (timestampWatermarkMode == WITH_WATERMARK_GENERATOR && autoWatermarkInterval > 0) { - PeriodicWatermarkEmitter periodicEmitter = - new PeriodicWatermarkEmitter<>( - checkpointLock, - subscribedPartitionStates, - watermarkOutputMultiplexer, - processingTimeProvider, - autoWatermarkInterval); - - periodicEmitter.start(); - } - } - - /** - * Adds a list of newly discovered partitions to the fetcher for consuming. - * - *

    This method creates the partition state holder for each new partition, using {@link - * KafkaTopicPartitionStateSentinel#EARLIEST_OFFSET} as the starting offset. It uses the - * earliest offset because there may be delay in discovering a partition after it was created - * and started receiving records. - * - *

    After the state representation for a partition is created, it is added to the unassigned - * partitions queue to await to be consumed. - * - * @param newPartitions discovered partitions to add - */ - public void addDiscoveredPartitions(List newPartitions) - throws IOException, ClassNotFoundException { - List> newPartitionStates = - createPartitionStateHolders( - newPartitions, - KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET, - timestampWatermarkMode, - watermarkStrategy, - userCodeClassLoader); - - if (useMetrics) { - registerOffsetMetrics(consumerMetricGroup, newPartitionStates); - } - - for (KafkaTopicPartitionState newPartitionState : newPartitionStates) { - // the ordering is crucial here; first register the state holder, then - // push it to the partitions queue to be read - subscribedPartitionStates.add(newPartitionState); - unassignedPartitionsQueue.add(newPartitionState); - } - } - - // ------------------------------------------------------------------------ - // Properties - // ------------------------------------------------------------------------ - - /** - * Gets all partitions (with partition state) that this fetcher is subscribed to. - * - * @return All subscribed partitions. - */ - protected final List> subscribedPartitionStates() { - return subscribedPartitionStates; - } - - // ------------------------------------------------------------------------ - // Core fetcher work methods - // ------------------------------------------------------------------------ - - public abstract void runFetchLoop() throws Exception; - - public abstract void cancel(); - - // ------------------------------------------------------------------------ - // Kafka version specifics - // ------------------------------------------------------------------------ - - /** - * Commits the given partition offsets to the Kafka brokers (or to ZooKeeper for older Kafka - * versions). This method is only ever called when the offset commit mode of the consumer is - * {@link OffsetCommitMode#ON_CHECKPOINTS}. - * - *

    The given offsets are the internal checkpointed offsets, representing the last processed - * record of each partition. Version-specific implementations of this method need to hold the - * contract that the given offsets must be incremented by 1 before committing them, so that - * committed offsets to Kafka represent "the next record to process". - * - * @param offsets The offsets to commit to Kafka (implementations must increment offsets by 1 - * before committing). - * @param commitCallback The callback that the user should trigger when a commit request - * completes or fails. - * @throws Exception This method forwards exceptions. - */ - public final void commitInternalOffsetsToKafka( - Map offsets, @Nonnull KafkaCommitCallback commitCallback) - throws Exception { - // Ignore sentinels. They might appear here if snapshot has started before actual offsets - // values - // replaced sentinels - doCommitInternalOffsetsToKafka(filterOutSentinels(offsets), commitCallback); - } - - protected abstract void doCommitInternalOffsetsToKafka( - Map offsets, @Nonnull KafkaCommitCallback commitCallback) - throws Exception; - - private Map filterOutSentinels( - Map offsets) { - return offsets.entrySet().stream() - .filter(entry -> !KafkaTopicPartitionStateSentinel.isSentinel(entry.getValue())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - /** - * Creates the Kafka version specific representation of the given topic partition. - * - * @param partition The Flink representation of the Kafka topic partition. - * @return The version-specific Kafka representation of the Kafka topic partition. - */ - protected abstract KPH createKafkaPartitionHandle(KafkaTopicPartition partition); - - // ------------------------------------------------------------------------ - // snapshot and restore the state - // ------------------------------------------------------------------------ - - /** - * Takes a snapshot of the partition offsets. - * - *

    Important: This method must be called under the checkpoint lock. - * - * @return A map from partition to current offset. - */ - public HashMap snapshotCurrentState() { - // this method assumes that the checkpoint lock is held - assert Thread.holdsLock(checkpointLock); - - HashMap state = new HashMap<>(subscribedPartitionStates.size()); - for (KafkaTopicPartitionState partition : subscribedPartitionStates) { - state.put(partition.getKafkaTopicPartition(), partition.getOffset()); - } - return state; - } - - // ------------------------------------------------------------------------ - // emitting records - // ------------------------------------------------------------------------ - - /** - * Emits a record attaching a timestamp to it. - * - * @param records The records to emit - * @param partitionState The state of the Kafka partition from which the record was fetched - * @param offset The offset of the corresponding Kafka record - * @param kafkaEventTimestamp The timestamp of the Kafka record - */ - protected void emitRecordsWithTimestamps( - Queue records, - KafkaTopicPartitionState partitionState, - long offset, - long kafkaEventTimestamp) { - // emit the records, using the checkpoint lock to guarantee - // atomicity of record emission and offset state update - synchronized (checkpointLock) { - T record; - while ((record = records.poll()) != null) { - long timestamp = partitionState.extractTimestamp(record, kafkaEventTimestamp); - sourceContext.collectWithTimestamp(record, timestamp); - - // this might emit a watermark, so do it after emitting the record - partitionState.onEvent(record, timestamp); - } - partitionState.setOffset(offset); - } - } - - // ------------------------------------------------------------------------ - // Utilities - // ------------------------------------------------------------------------ - - /** - * Utility method that takes the topic partitions and creates the topic partition state holders, - * depending on the timestamp / watermark mode. - */ - private List> createPartitionStateHolders( - Map partitionsToInitialOffsets, - int timestampWatermarkMode, - SerializedValue> watermarkStrategy, - ClassLoader userCodeClassLoader) - throws IOException, ClassNotFoundException { - - // CopyOnWrite as adding discovered partitions could happen in parallel - // while different threads iterate the partitions list - List> partitionStates = new CopyOnWriteArrayList<>(); - - switch (timestampWatermarkMode) { - case NO_TIMESTAMPS_WATERMARKS: - { - for (Map.Entry partitionEntry : - partitionsToInitialOffsets.entrySet()) { - // create the kafka version specific partition handle - KPH kafkaHandle = createKafkaPartitionHandle(partitionEntry.getKey()); - - KafkaTopicPartitionState partitionState = - new KafkaTopicPartitionState<>( - partitionEntry.getKey(), kafkaHandle); - partitionState.setOffset(partitionEntry.getValue()); - - partitionStates.add(partitionState); - } - - return partitionStates; - } - - case WITH_WATERMARK_GENERATOR: - { - for (Map.Entry partitionEntry : - partitionsToInitialOffsets.entrySet()) { - final KafkaTopicPartition kafkaTopicPartition = partitionEntry.getKey(); - KPH kafkaHandle = createKafkaPartitionHandle(kafkaTopicPartition); - WatermarkStrategy deserializedWatermarkStrategy = - watermarkStrategy.deserializeValue(userCodeClassLoader); - - // the format of the ID does not matter, as long as it is unique - final String partitionId = - kafkaTopicPartition.getTopic() - + '-' - + kafkaTopicPartition.getPartition(); - watermarkOutputMultiplexer.registerNewOutput(partitionId, watermark -> {}); - WatermarkOutput immediateOutput = - watermarkOutputMultiplexer.getImmediateOutput(partitionId); - WatermarkOutput deferredOutput = - watermarkOutputMultiplexer.getDeferredOutput(partitionId); - - KafkaTopicPartitionStateWithWatermarkGenerator partitionState = - new KafkaTopicPartitionStateWithWatermarkGenerator<>( - partitionEntry.getKey(), - kafkaHandle, - deserializedWatermarkStrategy.createTimestampAssigner( - () -> consumerMetricGroup), - // When upgrading to Flink 2.0, context has to provide also - // the input activity clock. This is not trivial for the old - // sources. Ideally we should drop this old source before - // this connector is upgraded to Flink 2.0. Otherwise, we - // can avoid the compilation error without fixing the bug - // addressed by the FLIP-471, by returning SystemClock, - // which would reproduce the pre-FLIP-471 behavior (without - // fixing the underlying bug). - deserializedWatermarkStrategy.createWatermarkGenerator( - () -> consumerMetricGroup), - immediateOutput, - deferredOutput); - - partitionState.setOffset(partitionEntry.getValue()); - - partitionStates.add(partitionState); - } - - return partitionStates; - } - - default: - // cannot happen, add this as a guard for the future - throw new RuntimeException(); - } - } - - /** - * Shortcut variant of {@link #createPartitionStateHolders(Map, int, SerializedValue, - * ClassLoader)} that uses the same offset for all partitions when creating their state holders. - */ - private List> createPartitionStateHolders( - List partitions, - long initialOffset, - int timestampWatermarkMode, - SerializedValue> watermarkStrategy, - ClassLoader userCodeClassLoader) - throws IOException, ClassNotFoundException { - - Map partitionsToInitialOffset = new HashMap<>(partitions.size()); - for (KafkaTopicPartition partition : partitions) { - partitionsToInitialOffset.put(partition, initialOffset); - } - - return createPartitionStateHolders( - partitionsToInitialOffset, - timestampWatermarkMode, - watermarkStrategy, - userCodeClassLoader); - } - - // ------------------------- Metrics ---------------------------------- - - /** - * For each partition, register a new metric group to expose current offsets and committed - * offsets. Per-partition metric groups can be scoped by user variables {@link - * KafkaConsumerMetricConstants#OFFSETS_BY_TOPIC_METRICS_GROUP} and {@link - * KafkaConsumerMetricConstants#OFFSETS_BY_PARTITION_METRICS_GROUP}. - * - *

    Note: this method also registers gauges for deprecated offset metrics, to maintain - * backwards compatibility. - * - * @param consumerMetricGroup The consumer metric group - * @param partitionOffsetStates The partition offset state holders, whose values will be used to - * update metrics - */ - private void registerOffsetMetrics( - MetricGroup consumerMetricGroup, - List> partitionOffsetStates) { - - for (KafkaTopicPartitionState ktp : partitionOffsetStates) { - MetricGroup topicPartitionGroup = - consumerMetricGroup - .addGroup(OFFSETS_BY_TOPIC_METRICS_GROUP, ktp.getTopic()) - .addGroup( - OFFSETS_BY_PARTITION_METRICS_GROUP, - Integer.toString(ktp.getPartition())); - - topicPartitionGroup.gauge( - CURRENT_OFFSETS_METRICS_GAUGE, - new OffsetGauge(ktp, OffsetGaugeType.CURRENT_OFFSET)); - topicPartitionGroup.gauge( - COMMITTED_OFFSETS_METRICS_GAUGE, - new OffsetGauge(ktp, OffsetGaugeType.COMMITTED_OFFSET)); - - legacyCurrentOffsetsMetricGroup.gauge( - getLegacyOffsetsMetricsGaugeName(ktp), - new OffsetGauge(ktp, OffsetGaugeType.CURRENT_OFFSET)); - legacyCommittedOffsetsMetricGroup.gauge( - getLegacyOffsetsMetricsGaugeName(ktp), - new OffsetGauge(ktp, OffsetGaugeType.COMMITTED_OFFSET)); - } - } - - private static String getLegacyOffsetsMetricsGaugeName(KafkaTopicPartitionState ktp) { - return ktp.getTopic() + "-" + ktp.getPartition(); - } - - /** Gauge types. */ - private enum OffsetGaugeType { - CURRENT_OFFSET, - COMMITTED_OFFSET - } - - /** Gauge for getting the offset of a KafkaTopicPartitionState. */ - private static class OffsetGauge implements Gauge { - - private final KafkaTopicPartitionState ktp; - private final OffsetGaugeType gaugeType; - - OffsetGauge(KafkaTopicPartitionState ktp, OffsetGaugeType gaugeType) { - this.ktp = ktp; - this.gaugeType = gaugeType; - } - - @Override - public Long getValue() { - switch (gaugeType) { - case COMMITTED_OFFSET: - return ktp.getCommittedOffset(); - case CURRENT_OFFSET: - return ktp.getOffset(); - default: - throw new RuntimeException("Unknown gauge type: " + gaugeType); - } - } - } - // ------------------------------------------------------------------------ - - /** - * The periodic watermark emitter. In its given interval, it checks all partitions for the - * current event time watermark, and possibly emits the next watermark. - */ - private static class PeriodicWatermarkEmitter implements ProcessingTimeCallback { - - private final Object checkpointLock; - - private final List> allPartitions; - - private final WatermarkOutputMultiplexer watermarkOutputMultiplexer; - - private final ProcessingTimeService timerService; - - private final long interval; - - // ------------------------------------------------- - - PeriodicWatermarkEmitter( - Object checkpointLock, - List> allPartitions, - WatermarkOutputMultiplexer watermarkOutputMultiplexer, - ProcessingTimeService timerService, - long autoWatermarkInterval) { - this.checkpointLock = checkpointLock; - this.allPartitions = checkNotNull(allPartitions); - this.watermarkOutputMultiplexer = watermarkOutputMultiplexer; - this.timerService = checkNotNull(timerService); - this.interval = autoWatermarkInterval; - } - - // ------------------------------------------------- - - public void start() { - timerService.registerTimer(timerService.getCurrentProcessingTime() + interval, this); - } - - @Override - public void onProcessingTime(long timestamp) { - - synchronized (checkpointLock) { - for (KafkaTopicPartitionState state : allPartitions) { - state.onPeriodicEmit(); - } - - watermarkOutputMultiplexer.onPeriodicEmit(); - } - - // schedule the next watermark - timerService.registerTimer(timerService.getCurrentProcessingTime() + interval, this); - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractPartitionDiscoverer.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractPartitionDiscoverer.java deleted file mode 100644 index c8dc18360..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractPartitionDiscoverer.java +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Base class for all partition discoverers. - * - *

    This partition discoverer base class implements the logic around bookkeeping discovered - * partitions, and using the information to determine whether or not there are new partitions that - * the consumer subtask should subscribe to. - * - *

    Subclass implementations should simply implement the logic of using the version-specific Kafka - * clients to fetch topic and partition metadata. - * - *

    Since Kafka clients are generally not thread-safe, partition discoverers should not be - * concurrently accessed. The only exception for this would be the {@link #wakeup()} call, which - * allows the discoverer to be interrupted during a {@link #discoverPartitions()} call. - */ -@Internal -@Deprecated -public abstract class AbstractPartitionDiscoverer { - - /** Describes whether we are discovering partitions for fixed topics or a topic pattern. */ - private final KafkaTopicsDescriptor topicsDescriptor; - - /** Index of the consumer subtask that this partition discoverer belongs to. */ - private final int indexOfThisSubtask; - - /** The total number of consumer subtasks. */ - private final int numParallelSubtasks; - - /** Flag to determine whether or not the discoverer is closed. */ - private volatile boolean closed = true; - - /** - * Flag to determine whether or not the discoverer had been woken up. When set to {@code true}, - * {@link #discoverPartitions()} would be interrupted as early as possible. Once interrupted, - * the flag is reset. - */ - private volatile boolean wakeup; - - /** - * Map of topics to they're largest discovered partition id seen by this subtask. This state may - * be updated whenever {@link AbstractPartitionDiscoverer#discoverPartitions()} or {@link - * AbstractPartitionDiscoverer#setAndCheckDiscoveredPartition(KafkaTopicPartition)} is called. - * - *

    This is used to remove old partitions from the fetched partition lists. It is sufficient - * to keep track of only the largest partition id because Kafka partition numbers are only - * allowed to be increased and has incremental ids. - */ - private Set discoveredPartitions; - - public AbstractPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks) { - - this.topicsDescriptor = checkNotNull(topicsDescriptor); - this.indexOfThisSubtask = indexOfThisSubtask; - this.numParallelSubtasks = numParallelSubtasks; - this.discoveredPartitions = new HashSet<>(); - } - - /** - * Opens the partition discoverer, initializing all required Kafka connections. - * - *

    NOTE: thread-safety is not guaranteed. - */ - public void open() throws Exception { - closed = false; - initializeConnections(); - } - - /** - * Closes the partition discoverer, cleaning up all Kafka connections. - * - *

    NOTE: thread-safety is not guaranteed. - */ - public void close() throws Exception { - closed = true; - closeConnections(); - } - - /** - * Interrupt an in-progress discovery attempt by throwing a {@link WakeupException}. If no - * attempt is in progress, the immediate next attempt will throw a {@link WakeupException}. - * - *

    This method can be called concurrently from a different thread. - */ - public void wakeup() { - wakeup = true; - wakeupConnections(); - } - - /** - * Execute a partition discovery attempt for this subtask. This method lets the partition - * discoverer update what partitions it has discovered so far. - * - * @return List of discovered new partitions that this subtask should subscribe to. - */ - public List discoverPartitions() throws WakeupException, ClosedException { - if (!closed && !wakeup) { - try { - List newDiscoveredPartitions; - - // (1) get all possible partitions, based on whether we are subscribed to fixed - // topics or a topic pattern - if (topicsDescriptor.isFixedTopics()) { - newDiscoveredPartitions = - new ArrayList<>( - getAllPartitionsForTopics(topicsDescriptor.getFixedTopics())); - } else { - List matchedTopics = new ArrayList<>(getAllTopics()); - - // retain topics that match the pattern - matchedTopics.removeIf(s -> !topicsDescriptor.isMatchingTopic(s)); - - if (!matchedTopics.isEmpty()) { - // get partitions only for matched topics - newDiscoveredPartitions = - new ArrayList<>(getAllPartitionsForTopics(matchedTopics)); - } else { - newDiscoveredPartitions = null; - } - } - - // (2) eliminate partition that are old partitions or should not be subscribed by - // this subtask - if (newDiscoveredPartitions == null || newDiscoveredPartitions.isEmpty()) { - throw new RuntimeException( - "Unable to retrieve any partitions with KafkaTopicsDescriptor: " - + topicsDescriptor); - } else { - newDiscoveredPartitions.removeIf( - nextPartition -> !setAndCheckDiscoveredPartition(nextPartition)); - } - - return newDiscoveredPartitions; - } catch (WakeupException e) { - // the actual topic / partition metadata fetching methods - // may be woken up midway; reset the wakeup flag and rethrow - wakeup = false; - throw e; - } - } else if (!closed && wakeup) { - // may have been woken up before the method call - wakeup = false; - throw new WakeupException(); - } else { - throw new ClosedException(); - } - } - - /** - * Sets a partition as discovered. Partitions are considered as new if its partition id is - * larger than all partition ids previously seen for the topic it belongs to. Therefore, for a - * set of discovered partitions, the order that this method is invoked with each partition is - * important. - * - *

    If the partition is indeed newly discovered, this method also returns whether the new - * partition should be subscribed by this subtask. - * - * @param partition the partition to set and check - * @return {@code true}, if the partition wasn't seen before and should be subscribed by this - * subtask; {@code false} otherwise - */ - public boolean setAndCheckDiscoveredPartition(KafkaTopicPartition partition) { - if (isUndiscoveredPartition(partition)) { - discoveredPartitions.add(partition); - - return KafkaTopicPartitionAssigner.assign(partition, numParallelSubtasks) - == indexOfThisSubtask; - } - - return false; - } - - // ------------------------------------------------------------------------ - // Kafka version specifics - // ------------------------------------------------------------------------ - - /** Establish the required connections in order to fetch topics and partitions metadata. */ - protected abstract void initializeConnections() throws Exception; - - /** - * Attempt to eagerly wakeup from blocking calls to Kafka in {@link - * AbstractPartitionDiscoverer#getAllTopics()} and {@link - * AbstractPartitionDiscoverer#getAllPartitionsForTopics(List)}. - * - *

    If the invocation indeed results in interrupting an actual blocking Kafka call, the - * implementations of {@link AbstractPartitionDiscoverer#getAllTopics()} and {@link - * AbstractPartitionDiscoverer#getAllPartitionsForTopics(List)} are responsible of throwing a - * {@link WakeupException}. - */ - protected abstract void wakeupConnections(); - - /** Close all established connections. */ - protected abstract void closeConnections() throws Exception; - - /** Fetch the list of all topics from Kafka. */ - protected abstract List getAllTopics() throws WakeupException; - - /** Fetch the list of all partitions for a specific topics list from Kafka. */ - protected abstract List getAllPartitionsForTopics(List topics) - throws WakeupException; - - // ------------------------------------------------------------------------ - // Utilities - // ------------------------------------------------------------------------ - - /** Signaling exception to indicate that an actual Kafka call was interrupted. */ - public static final class WakeupException extends Exception { - private static final long serialVersionUID = 1L; - } - - /** Thrown if this discoverer was used to discover partitions after it was closed. */ - public static final class ClosedException extends Exception { - private static final long serialVersionUID = 1L; - } - - private boolean isUndiscoveredPartition(KafkaTopicPartition partition) { - return !discoveredPartitions.contains(partition); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/ClosableBlockingQueue.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/ClosableBlockingQueue.java deleted file mode 100644 index 3b1751d40..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/ClosableBlockingQueue.java +++ /dev/null @@ -1,501 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.ReentrantLock; - -import static java.util.Objects.requireNonNull; - -/** - * A special form of blocking queue with two additions: - * - *

      - *
    1. The queue can be closed atomically when empty. Adding elements after the queue is closed - * fails. This allows queue consumers to atomically discover that no elements are available - * and mark themselves as shut down. - *
    2. The queue allows to poll batches of elements in one polling call. - *
    - * - *

    The queue has no capacity restriction and is safe for multiple producers and consumers. - * - *

    Note: Null elements are prohibited. - * - * @param The type of elements in the queue. - */ -@Internal -@Deprecated -public class ClosableBlockingQueue { - - /** The lock used to make queue accesses and open checks atomic. */ - private final ReentrantLock lock; - - /** The condition on which blocking get-calls wait if the queue is empty. */ - private final Condition nonEmpty; - - /** The deque of elements. */ - private final ArrayDeque elements; - - /** Flag marking the status of the queue. */ - private volatile boolean open; - - // ------------------------------------------------------------------------ - - /** Creates a new empty queue. */ - public ClosableBlockingQueue() { - this(10); - } - - /** - * Creates a new empty queue, reserving space for at least the specified number of elements. The - * queue can still grow, of more elements are added than the reserved space. - * - * @param initialSize The number of elements to reserve space for. - */ - public ClosableBlockingQueue(int initialSize) { - this.lock = new ReentrantLock(true); - this.nonEmpty = this.lock.newCondition(); - - this.elements = new ArrayDeque<>(initialSize); - this.open = true; - } - - /** - * Creates a new queue that contains the given elements. - * - * @param initialElements The elements to initially add to the queue. - */ - public ClosableBlockingQueue(Collection initialElements) { - this(initialElements.size()); - this.elements.addAll(initialElements); - } - - // ------------------------------------------------------------------------ - // Size and status - // ------------------------------------------------------------------------ - - /** - * Gets the number of elements currently in the queue. - * - * @return The number of elements currently in the queue. - */ - public int size() { - lock.lock(); - try { - return elements.size(); - } finally { - lock.unlock(); - } - } - - /** - * Checks whether the queue is empty (has no elements). - * - * @return True, if the queue is empty; false, if it is non-empty. - */ - public boolean isEmpty() { - return size() == 0; - } - - /** - * Checks whether the queue is currently open, meaning elements can be added and polled. - * - * @return True, if the queue is open; false, if it is closed. - */ - public boolean isOpen() { - return open; - } - - /** - * Tries to close the queue. Closing the queue only succeeds when no elements are in the queue - * when this method is called. Checking whether the queue is empty, and marking the queue as - * closed is one atomic operation. - * - * @return True, if the queue is closed, false if the queue remains open. - */ - public boolean close() { - lock.lock(); - try { - if (open) { - if (elements.isEmpty()) { - open = false; - nonEmpty.signalAll(); - return true; - } else { - return false; - } - } else { - // already closed - return true; - } - } finally { - lock.unlock(); - } - } - - // ------------------------------------------------------------------------ - // Adding / Removing elements - // ------------------------------------------------------------------------ - - /** - * Tries to add an element to the queue, if the queue is still open. Checking whether the queue - * is open and adding the element is one atomic operation. - * - *

    Unlike the {@link #add(Object)} method, this method never throws an exception, but only - * indicates via the return code if the element was added or the queue was closed. - * - * @param element The element to add. - * @return True, if the element was added, false if the queue was closes. - */ - public boolean addIfOpen(E element) { - requireNonNull(element); - - lock.lock(); - try { - if (open) { - elements.addLast(element); - if (elements.size() == 1) { - nonEmpty.signalAll(); - } - } - return open; - } finally { - lock.unlock(); - } - } - - /** - * Adds the element to the queue, or fails with an exception, if the queue is closed. Checking - * whether the queue is open and adding the element is one atomic operation. - * - * @param element The element to add. - * @throws IllegalStateException Thrown, if the queue is closed. - */ - public void add(E element) throws IllegalStateException { - requireNonNull(element); - - lock.lock(); - try { - if (open) { - elements.addLast(element); - if (elements.size() == 1) { - nonEmpty.signalAll(); - } - } else { - throw new IllegalStateException("queue is closed"); - } - } finally { - lock.unlock(); - } - } - - /** - * Returns the queue's next element without removing it, if the queue is non-empty. Otherwise, - * returns null. - * - *

    The method throws an {@code IllegalStateException} if the queue is closed. Checking - * whether the queue is open and getting the next element is one atomic operation. - * - *

    This method never blocks. - * - * @return The queue's next element, or null, if the queue is empty. - * @throws IllegalStateException Thrown, if the queue is closed. - */ - public E peek() { - lock.lock(); - try { - if (open) { - if (elements.size() > 0) { - return elements.getFirst(); - } else { - return null; - } - } else { - throw new IllegalStateException("queue is closed"); - } - } finally { - lock.unlock(); - } - } - - /** - * Returns the queue's next element and removes it, the queue is non-empty. Otherwise, this - * method returns null. - * - *

    The method throws an {@code IllegalStateException} if the queue is closed. Checking - * whether the queue is open and removing the next element is one atomic operation. - * - *

    This method never blocks. - * - * @return The queue's next element, or null, if the queue is empty. - * @throws IllegalStateException Thrown, if the queue is closed. - */ - public E poll() { - lock.lock(); - try { - if (open) { - if (elements.size() > 0) { - return elements.removeFirst(); - } else { - return null; - } - } else { - throw new IllegalStateException("queue is closed"); - } - } finally { - lock.unlock(); - } - } - - /** - * Returns all of the queue's current elements in a list, if the queue is non-empty. Otherwise, - * this method returns null. - * - *

    The method throws an {@code IllegalStateException} if the queue is closed. Checking - * whether the queue is open and removing the elements is one atomic operation. - * - *

    This method never blocks. - * - * @return All of the queue's elements, or null, if the queue is empty. - * @throws IllegalStateException Thrown, if the queue is closed. - */ - public List pollBatch() { - lock.lock(); - try { - if (open) { - if (elements.size() > 0) { - ArrayList result = new ArrayList<>(elements); - elements.clear(); - return result; - } else { - return null; - } - } else { - throw new IllegalStateException("queue is closed"); - } - } finally { - lock.unlock(); - } - } - - /** - * Returns the next element in the queue. If the queue is empty, this method waits until at - * least one element is added. - * - *

    The method throws an {@code IllegalStateException} if the queue is closed. Checking - * whether the queue is open and removing the next element is one atomic operation. - * - * @return The next element in the queue, never null. - * @throws IllegalStateException Thrown, if the queue is closed. - * @throws InterruptedException Throw, if the thread is interrupted while waiting for an element - * to be added. - */ - public E getElementBlocking() throws InterruptedException { - lock.lock(); - try { - while (open && elements.isEmpty()) { - nonEmpty.await(); - } - - if (open) { - return elements.removeFirst(); - } else { - throw new IllegalStateException("queue is closed"); - } - } finally { - lock.unlock(); - } - } - - /** - * Returns the next element in the queue. If the queue is empty, this method waits at most a - * certain time until an element becomes available. If no element is available after that time, - * the method returns null. - * - *

    The method throws an {@code IllegalStateException} if the queue is closed. Checking - * whether the queue is open and removing the next element is one atomic operation. - * - * @param timeoutMillis The number of milliseconds to block, at most. - * @return The next element in the queue, or null, if the timeout expires before an element is - * available. - * @throws IllegalStateException Thrown, if the queue is closed. - * @throws InterruptedException Throw, if the thread is interrupted while waiting for an element - * to be added. - */ - public E getElementBlocking(long timeoutMillis) throws InterruptedException { - if (timeoutMillis == 0L) { - // wait forever case - return getElementBlocking(); - } else if (timeoutMillis < 0L) { - throw new IllegalArgumentException("invalid timeout"); - } - - final long deadline = System.nanoTime() + timeoutMillis * 1_000_000L; - - lock.lock(); - try { - while (open && elements.isEmpty() && timeoutMillis > 0) { - nonEmpty.await(timeoutMillis, TimeUnit.MILLISECONDS); - timeoutMillis = (deadline - System.nanoTime()) / 1_000_000L; - } - - if (!open) { - throw new IllegalStateException("queue is closed"); - } else if (elements.isEmpty()) { - return null; - } else { - return elements.removeFirst(); - } - } finally { - lock.unlock(); - } - } - - /** - * Gets all the elements found in the list, or blocks until at least one element was added. If - * the queue is empty when this method is called, it blocks until at least one element is added. - * - *

    This method always returns a list with at least one element. - * - *

    The method throws an {@code IllegalStateException} if the queue is closed. Checking - * whether the queue is open and removing the next element is one atomic operation. - * - * @return A list with all elements in the queue, always at least one element. - * @throws IllegalStateException Thrown, if the queue is closed. - * @throws InterruptedException Throw, if the thread is interrupted while waiting for an element - * to be added. - */ - public List getBatchBlocking() throws InterruptedException { - lock.lock(); - try { - while (open && elements.isEmpty()) { - nonEmpty.await(); - } - if (open) { - ArrayList result = new ArrayList<>(elements); - elements.clear(); - return result; - } else { - throw new IllegalStateException("queue is closed"); - } - } finally { - lock.unlock(); - } - } - - /** - * Gets all the elements found in the list, or blocks until at least one element was added. This - * method is similar as {@link #getBatchBlocking()}, but takes a number of milliseconds that the - * method will maximally wait before returning. - * - *

    This method never returns null, but an empty list, if the queue is empty when the method - * is called and the request times out before an element was added. - * - *

    The method throws an {@code IllegalStateException} if the queue is closed. Checking - * whether the queue is open and removing the next element is one atomic operation. - * - * @param timeoutMillis The number of milliseconds to wait, at most. - * @return A list with all elements in the queue, possible an empty list. - * @throws IllegalStateException Thrown, if the queue is closed. - * @throws InterruptedException Throw, if the thread is interrupted while waiting for an element - * to be added. - */ - public List getBatchBlocking(long timeoutMillis) throws InterruptedException { - if (timeoutMillis == 0L) { - // wait forever case - return getBatchBlocking(); - } else if (timeoutMillis < 0L) { - throw new IllegalArgumentException("invalid timeout"); - } - - final long deadline = System.nanoTime() + timeoutMillis * 1_000_000L; - - lock.lock(); - try { - while (open && elements.isEmpty() && timeoutMillis > 0) { - nonEmpty.await(timeoutMillis, TimeUnit.MILLISECONDS); - timeoutMillis = (deadline - System.nanoTime()) / 1_000_000L; - } - - if (!open) { - throw new IllegalStateException("queue is closed"); - } else if (elements.isEmpty()) { - return Collections.emptyList(); - } else { - ArrayList result = new ArrayList<>(elements); - elements.clear(); - return result; - } - } finally { - lock.unlock(); - } - } - - // ------------------------------------------------------------------------ - // Standard Utilities - // ------------------------------------------------------------------------ - - @Override - public int hashCode() { - int hashCode = 17; - for (E element : elements) { - hashCode = 31 * hashCode + element.hashCode(); - } - return hashCode; - } - - @Override - public boolean equals(Object obj) { - if (obj == this) { - return true; - } else if (obj != null && obj.getClass() == ClosableBlockingQueue.class) { - @SuppressWarnings("unchecked") - ClosableBlockingQueue that = (ClosableBlockingQueue) obj; - - if (this.elements.size() == that.elements.size()) { - Iterator thisElements = this.elements.iterator(); - for (E thatNext : that.elements) { - E thisNext = thisElements.next(); - if (!(thisNext == null ? thatNext == null : thisNext.equals(thatNext))) { - return false; - } - } - return true; - } else { - return false; - } - } else { - return false; - } - } - - @Override - public String toString() { - return elements.toString(); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/ExceptionProxy.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/ExceptionProxy.java deleted file mode 100644 index a9f9c9cae..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/ExceptionProxy.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -import javax.annotation.Nullable; - -import java.util.concurrent.atomic.AtomicReference; - -/** - * A proxy that communicates exceptions between threads. Typically used if an exception from a - * spawned thread needs to be recognized by the "parent" (spawner) thread. - * - *

    The spawned thread would set the exception via {@link #reportError(Throwable)}. The parent - * would check (at certain points) for exceptions via {@link #checkAndThrowException()}. Optionally, - * the parent can pass itself in the constructor to be interrupted as soon as an exception occurs. - * - *

    {@code
    - * final ExceptionProxy errorProxy = new ExceptionProxy(Thread.currentThread());
    - *
    - * Thread subThread = new Thread() {
    - *
    - *     public void run() {
    - *         try {
    - *             doSomething();
    - *         } catch (Throwable t) {
    - *             errorProxy.reportError(
    - *         } finally {
    - *             doSomeCleanup();
    - *         }
    - *     }
    - * };
    - * subThread.start();
    - *
    - * doSomethingElse();
    - * errorProxy.checkAndThrowException();
    - *
    - * doSomethingMore();
    - * errorProxy.checkAndThrowException();
    - *
    - * try {
    - *     subThread.join();
    - * } catch (InterruptedException e) {
    - *     errorProxy.checkAndThrowException();
    - *     // restore interrupted status, if not caused by an exception
    - *     Thread.currentThread().interrupt();
    - * }
    - * }
    - */ -@Internal -@Deprecated -public class ExceptionProxy { - - /** The thread that should be interrupted when an exception occurs. */ - private final Thread toInterrupt; - - /** The exception to throw. */ - private final AtomicReference exception; - - /** - * Creates an exception proxy that interrupts the given thread upon report of an exception. The - * thread to interrupt may be null. - * - * @param toInterrupt The thread to interrupt upon an exception. May be null. - */ - public ExceptionProxy(@Nullable Thread toInterrupt) { - this.toInterrupt = toInterrupt; - this.exception = new AtomicReference<>(); - } - - // ------------------------------------------------------------------------ - - /** - * Sets the exception and interrupts the target thread, if no other exception has occurred so - * far. - * - *

    The exception is only set (and the interruption is only triggered), if no other exception - * was set before. - * - * @param t The exception that occurred - */ - public void reportError(Throwable t) { - // set the exception, if it is the first (and the exception is non null) - if (t != null && exception.compareAndSet(null, t) && toInterrupt != null) { - toInterrupt.interrupt(); - } - } - - /** - * Checks whether an exception has been set via {@link #reportError(Throwable)}. If yes, that - * exception if re-thrown by this method. - * - * @throws Exception This method re-throws the exception, if set. - */ - public void checkAndThrowException() throws Exception { - Throwable t = exception.get(); - if (t != null) { - if (t instanceof Exception) { - throw (Exception) t; - } else if (t instanceof Error) { - throw (Error) t; - } else { - throw new Exception(t); - } - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/FlinkKafkaInternalProducer.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/FlinkKafkaInternalProducer.java deleted file mode 100644 index 6e618cbe0..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/FlinkKafkaInternalProducer.java +++ /dev/null @@ -1,423 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.util.Preconditions; - -import org.apache.kafka.clients.consumer.ConsumerGroupMetadata; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; -import org.apache.kafka.clients.producer.Callback; -import org.apache.kafka.clients.producer.KafkaProducer; -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerConfig; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.kafka.clients.producer.internals.TransactionManager; -import org.apache.kafka.clients.producer.internals.TransactionalRequestResult; -import org.apache.kafka.common.Metric; -import org.apache.kafka.common.MetricName; -import org.apache.kafka.common.Node; -import org.apache.kafka.common.PartitionInfo; -import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.errors.ProducerFencedException; -import org.apache.kafka.common.requests.FindCoordinatorRequest; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nullable; - -import java.lang.reflect.Constructor; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.time.Duration; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.concurrent.Future; -import java.util.stream.Collectors; - -/** Internal flink kafka producer. */ -@PublicEvolving -@Deprecated -public class FlinkKafkaInternalProducer implements Producer { - private static final Logger LOG = LoggerFactory.getLogger(FlinkKafkaInternalProducer.class); - - protected final KafkaProducer kafkaProducer; - - // This lock and closed flag are introduced to workaround KAFKA-6635. Because the bug is only - // fixed in - // Kafka 2.3.0, we need this workaround before Kafka dependency is bumped to 2.3.0 to avoid - // deadlock - // between a transaction committing / aborting thread and a producer closing thread. - // TODO: remove the workaround after Kafka dependency is bumped to 2.3.0+ - private final Object producerClosingLock; - private volatile boolean closed; - - @Nullable protected final String transactionalId; - - public FlinkKafkaInternalProducer(Properties properties) { - transactionalId = properties.getProperty(ProducerConfig.TRANSACTIONAL_ID_CONFIG); - kafkaProducer = new KafkaProducer<>(properties); - producerClosingLock = new Object(); - closed = false; - } - - // -------------------------------- Simple proxy method calls -------------------------------- - - @Override - public void initTransactions() { - synchronized (producerClosingLock) { - ensureNotClosed(); - kafkaProducer.initTransactions(); - } - } - - @Override - public void beginTransaction() throws ProducerFencedException { - synchronized (producerClosingLock) { - ensureNotClosed(); - kafkaProducer.beginTransaction(); - } - } - - @Override - public void commitTransaction() throws ProducerFencedException { - synchronized (producerClosingLock) { - ensureNotClosed(); - kafkaProducer.commitTransaction(); - } - } - - @Override - public void abortTransaction() throws ProducerFencedException { - synchronized (producerClosingLock) { - ensureNotClosed(); - kafkaProducer.abortTransaction(); - } - } - - @Override - public void sendOffsetsToTransaction( - Map offsets, String consumerGroupId) - throws ProducerFencedException { - synchronized (producerClosingLock) { - ensureNotClosed(); - kafkaProducer.sendOffsetsToTransaction(offsets, consumerGroupId); - } - } - - @Override - public void sendOffsetsToTransaction( - Map map, ConsumerGroupMetadata consumerGroupMetadata) - throws ProducerFencedException { - kafkaProducer.sendOffsetsToTransaction(map, consumerGroupMetadata); - } - - @Override - public Future send(ProducerRecord record) { - return kafkaProducer.send(record); - } - - @Override - public Future send(ProducerRecord record, Callback callback) { - return kafkaProducer.send(record, callback); - } - - @Override - public List partitionsFor(String topic) { - synchronized (producerClosingLock) { - ensureNotClosed(); - return kafkaProducer.partitionsFor(topic); - } - } - - @Override - public Map metrics() { - return kafkaProducer.metrics(); - } - - @Override - public void close() { - throw new UnsupportedOperationException( - "Close without timeout is now allowed because it can leave lingering Kafka threads."); - } - - @Override - public void close(Duration duration) { - synchronized (producerClosingLock) { - kafkaProducer.close(duration); - if (LOG.isDebugEnabled()) { - LOG.debug( - "Closed internal KafkaProducer {}. Stacktrace: {}", - System.identityHashCode(this), - Arrays.stream(Thread.currentThread().getStackTrace()) - .map(StackTraceElement::toString) - .collect(Collectors.joining("\n"))); - } - closed = true; - } - } - - // -------------------------------- New methods or methods with changed behaviour - // -------------------------------- - - @Override - public void flush() { - kafkaProducer.flush(); - if (transactionalId != null) { - synchronized (producerClosingLock) { - ensureNotClosed(); - flushNewPartitions(); - } - } - } - - /** - * Instead of obtaining producerId and epoch from the transaction coordinator, re-use previously - * obtained ones, so that we can resume transaction after a restart. Implementation of this - * method is based on {@link KafkaProducer#initTransactions}. - * https://github.com/apache/kafka/commit/5d2422258cb975a137a42a4e08f03573c49a387e#diff-f4ef1afd8792cd2a2e9069cd7ddea630 - */ - public void resumeTransaction(long producerId, short epoch) { - synchronized (producerClosingLock) { - ensureNotClosed(); - Preconditions.checkState( - producerId >= 0 && epoch >= 0, - "Incorrect values for producerId %s and epoch %s", - producerId, - epoch); - LOG.info( - "Attempting to resume transaction {} with producerId {} and epoch {}", - transactionalId, - producerId, - epoch); - - Object transactionManager = getField(kafkaProducer, "transactionManager"); - synchronized (transactionManager) { - Object txnPartitionMap = getField(transactionManager, "txnPartitionMap"); - - invoke( - transactionManager, - "transitionTo", - getEnum( - "org.apache.kafka.clients.producer.internals.TransactionManager$State.INITIALIZING")); - invoke(txnPartitionMap, "reset"); - - setField( - transactionManager, - "producerIdAndEpoch", - createProducerIdAndEpoch(producerId, epoch)); - - invoke( - transactionManager, - "transitionTo", - getEnum( - "org.apache.kafka.clients.producer.internals.TransactionManager$State.READY")); - - invoke( - transactionManager, - "transitionTo", - getEnum( - "org.apache.kafka.clients.producer.internals.TransactionManager$State.IN_TRANSACTION")); - setField(transactionManager, "transactionStarted", true); - } - } - } - - public String getTransactionalId() { - return transactionalId; - } - - public long getProducerId() { - Object transactionManager = getField(kafkaProducer, "transactionManager"); - Object producerIdAndEpoch = getField(transactionManager, "producerIdAndEpoch"); - return (long) getField(producerIdAndEpoch, "producerId"); - } - - public short getEpoch() { - Object transactionManager = getField(kafkaProducer, "transactionManager"); - Object producerIdAndEpoch = getField(transactionManager, "producerIdAndEpoch"); - return (short) getField(producerIdAndEpoch, "epoch"); - } - - @VisibleForTesting - public int getTransactionCoordinatorId() { - Object transactionManager = getField(kafkaProducer, "transactionManager"); - Node node = - (Node) - invoke( - transactionManager, - "coordinator", - FindCoordinatorRequest.CoordinatorType.TRANSACTION); - return node.id(); - } - - private void ensureNotClosed() { - if (closed) { - throw new IllegalStateException( - String.format( - "The producer %s has already been closed", - System.identityHashCode(this))); - } - } - - private Object createProducerIdAndEpoch(long producerId, short epoch) { - try { - Field field = TransactionManager.class.getDeclaredField("producerIdAndEpoch"); - Class clazz = field.getType(); - Constructor constructor = clazz.getDeclaredConstructor(Long.TYPE, Short.TYPE); - constructor.setAccessible(true); - return constructor.newInstance(producerId, epoch); - } catch (InvocationTargetException - | InstantiationException - | IllegalAccessException - | NoSuchFieldException - | NoSuchMethodException e) { - throw new RuntimeException("Incompatible KafkaProducer version", e); - } - } - - /** - * Besides committing {@link org.apache.kafka.clients.producer.KafkaProducer#commitTransaction} - * is also adding new partitions to the transaction. flushNewPartitions method is moving this - * logic to pre-commit/flush, to make resumeTransaction simpler. Otherwise resumeTransaction - * would require to restore state of the not yet added/"in-flight" partitions. - */ - private void flushNewPartitions() { - LOG.info("Flushing new partitions"); - TransactionalRequestResult result = enqueueNewPartitions(); - Object sender = getField(kafkaProducer, "sender"); - invoke(sender, "wakeup"); - result.await(); - } - - /** - * Enqueues new transactions at the transaction manager and returns a {@link - * TransactionalRequestResult} that allows waiting on them. - * - *

    If there are no new transactions we return a {@link TransactionalRequestResult} that is - * already done. - */ - private TransactionalRequestResult enqueueNewPartitions() { - Object transactionManager = getField(kafkaProducer, "transactionManager"); - synchronized (transactionManager) { - Object newPartitionsInTransaction = - getField(transactionManager, "newPartitionsInTransaction"); - Object newPartitionsInTransactionIsEmpty = - invoke(newPartitionsInTransaction, "isEmpty"); - TransactionalRequestResult result; - if (newPartitionsInTransactionIsEmpty instanceof Boolean - && !((Boolean) newPartitionsInTransactionIsEmpty)) { - Object txnRequestHandler = - invoke(transactionManager, "addPartitionsToTransactionHandler"); - invoke( - transactionManager, - "enqueueRequest", - new Class[] {txnRequestHandler.getClass().getSuperclass()}, - new Object[] {txnRequestHandler}); - result = - (TransactionalRequestResult) - getField( - txnRequestHandler, - txnRequestHandler.getClass().getSuperclass(), - "result"); - } else { - // we don't have an operation but this operation string is also used in - // addPartitionsToTransactionHandler. - result = new TransactionalRequestResult("AddPartitionsToTxn"); - result.done(); - } - return result; - } - } - - protected static Enum getEnum(String enumFullName) { - String[] x = enumFullName.split("\\.(?=[^\\.]+$)"); - if (x.length == 2) { - String enumClassName = x[0]; - String enumName = x[1]; - try { - Class cl = (Class) Class.forName(enumClassName); - return Enum.valueOf(cl, enumName); - } catch (ClassNotFoundException e) { - throw new RuntimeException("Incompatible KafkaProducer version", e); - } - } - return null; - } - - protected static Object invoke(Object object, String methodName, Object... args) { - Class[] argTypes = new Class[args.length]; - for (int i = 0; i < args.length; i++) { - argTypes[i] = args[i].getClass(); - } - return invoke(object, methodName, argTypes, args); - } - - private static Object invoke( - Object object, String methodName, Class[] argTypes, Object[] args) { - try { - Method method = object.getClass().getDeclaredMethod(methodName, argTypes); - method.setAccessible(true); - return method.invoke(object, args); - } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { - throw new RuntimeException("Incompatible KafkaProducer version", e); - } - } - - /** - * Gets and returns the field {@code fieldName} from the given Object {@code object} using - * reflection. - */ - protected static Object getField(Object object, String fieldName) { - return getField(object, object.getClass(), fieldName); - } - - /** - * Gets and returns the field {@code fieldName} from the given Object {@code object} using - * reflection. - */ - private static Object getField(Object object, Class clazz, String fieldName) { - try { - Field field = clazz.getDeclaredField(fieldName); - field.setAccessible(true); - return field.get(object); - } catch (NoSuchFieldException | IllegalAccessException e) { - throw new RuntimeException("Incompatible KafkaProducer version", e); - } - } - - /** - * Sets the field {@code fieldName} on the given Object {@code object} to {@code value} using - * reflection. - */ - protected static void setField(Object object, String fieldName, Object value) { - try { - Field field = object.getClass().getDeclaredField(fieldName); - field.setAccessible(true); - field.set(object, value); - } catch (NoSuchFieldException | IllegalAccessException e) { - throw new RuntimeException("Incompatible KafkaProducer version", e); - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/Handover.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/Handover.java deleted file mode 100644 index 64132b0b9..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/Handover.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.util.ExceptionUtils; - -import org.apache.kafka.clients.consumer.ConsumerRecords; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.ThreadSafe; - -import java.io.Closeable; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * The Handover is a utility to hand over data (a buffer of records) and exception from a - * producer thread to a consumer thread. It effectively behaves like a "size one - * blocking queue", with some extras around exception reporting, closing, and waking up thread - * without {@link Thread#interrupt() interrupting} threads. - * - *

    This class is used in the Flink Kafka Consumer to hand over data and exceptions between the - * thread that runs the KafkaConsumer class and the main thread. - * - *

    The Handover has the notion of "waking up" the producer thread with a {@link WakeupException} - * rather than a thread interrupt. - * - *

    The Handover can also be "closed", signalling from one thread to the other that it the thread - * has terminated. - */ -@ThreadSafe -@Internal -@Deprecated -public final class Handover implements Closeable { - - private final Object lock = new Object(); - - private ConsumerRecords next; - private Throwable error; - private boolean wakeupProducer; - - /** - * Polls the next element from the Handover, possibly blocking until the next element is - * available. This method behaves similar to polling from a blocking queue. - * - *

    If an exception was handed in by the producer ({@link #reportError(Throwable)}), then that - * exception is thrown rather than an element being returned. - * - * @return The next element (buffer of records, never null). - * @throws ClosedException Thrown if the Handover was {@link #close() closed}. - * @throws Exception Rethrows exceptions from the {@link #reportError(Throwable)} method. - */ - @Nonnull - public ConsumerRecords pollNext() throws Exception { - synchronized (lock) { - while (next == null && error == null) { - lock.wait(); - } - - ConsumerRecords n = next; - if (n != null) { - next = null; - lock.notifyAll(); - return n; - } else { - ExceptionUtils.rethrowException(error, error.getMessage()); - - // this statement cannot be reached since the above method always throws an - // exception - // this is only here to silence the compiler and any warnings - return ConsumerRecords.empty(); - } - } - } - - /** - * Hands over an element from the producer. If the Handover already has an element that was not - * yet picked up by the consumer thread, this call blocks until the consumer picks up that - * previous element. - * - *

    This behavior is similar to a "size one" blocking queue. - * - * @param element The next element to hand over. - * @throws InterruptedException Thrown, if the thread is interrupted while blocking for the - * Handover to be empty. - * @throws WakeupException Thrown, if the {@link #wakeupProducer()} method is called while - * blocking for the Handover to be empty. - * @throws ClosedException Thrown if the Handover was closed or concurrently being closed. - */ - public void produce(final ConsumerRecords element) - throws InterruptedException, WakeupException, ClosedException { - - checkNotNull(element); - - synchronized (lock) { - while (next != null && !wakeupProducer) { - lock.wait(); - } - - wakeupProducer = false; - - // if there is still an element, we must have been woken up - if (next != null) { - throw new WakeupException(); - } - // if there is no error, then this is open and can accept this element - else if (error == null) { - next = element; - lock.notifyAll(); - } - // an error marks this as closed for the producer - else { - throw new ClosedException(); - } - } - } - - /** - * Reports an exception. The consumer will throw the given exception immediately, if it is - * currently blocked in the {@link #pollNext()} method, or the next time it calls that method. - * - *

    After this method has been called, no call to either {@link #produce(ConsumerRecords)} or - * {@link #pollNext()} will ever return regularly any more, but will always return - * exceptionally. - * - *

    If another exception was already reported, this method does nothing. - * - *

    For the producer, the Handover will appear as if it was {@link #close() closed}. - * - * @param t The exception to report. - */ - public void reportError(Throwable t) { - checkNotNull(t); - - synchronized (lock) { - // do not override the initial exception - if (error == null) { - error = t; - } - next = null; - lock.notifyAll(); - } - } - - /** - * Closes the handover. Both the {@link #produce(ConsumerRecords)} method and the {@link - * #pollNext()} will throw a {@link ClosedException} on any currently blocking and future - * invocations. - * - *

    If an exception was previously reported via the {@link #reportError(Throwable)} method, - * that exception will not be overridden. The consumer thread will throw that exception upon - * calling {@link #pollNext()}, rather than the {@code ClosedException}. - */ - @Override - public void close() { - synchronized (lock) { - next = null; - wakeupProducer = false; - - if (error == null) { - error = new ClosedException(); - } - lock.notifyAll(); - } - } - - /** - * Wakes the producer thread up. If the producer thread is currently blocked in the {@link - * #produce(ConsumerRecords)} method, it will exit the method throwing a {@link - * WakeupException}. - */ - public void wakeupProducer() { - synchronized (lock) { - wakeupProducer = true; - lock.notifyAll(); - } - } - - // ------------------------------------------------------------------------ - - /** - * An exception thrown by the Handover in the {@link #pollNext()} or {@link - * #produce(ConsumerRecords)} method, after the Handover was closed via {@link #close()}. - */ - public static final class ClosedException extends Exception { - private static final long serialVersionUID = 1L; - } - - /** - * A special exception thrown bv the Handover in the {@link #produce(ConsumerRecords)} method - * when the producer is woken up from a blocking call via {@link #wakeupProducer()}. - */ - public static final class WakeupException extends Exception { - private static final long serialVersionUID = 1L; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaCommitCallback.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaCommitCallback.java deleted file mode 100644 index f1180b8b5..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaCommitCallback.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -/** - * A callback interface that the source operator can implement to trigger custom actions when a - * commit request completes, which should normally be triggered from checkpoint complete event. - */ -@Internal -@Deprecated -public interface KafkaCommitCallback { - - /** - * A callback method the user can implement to provide asynchronous handling of commit request - * completion. This method will be called when the commit request sent to the server has been - * acknowledged without error. - */ - void onSuccess(); - - /** - * A callback method the user can implement to provide asynchronous handling of commit request - * failure. This method will be called when the commit request failed. - * - * @param cause Kafka commit failure cause returned by kafka client - */ - void onException(Throwable cause); -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaConsumerThread.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaConsumerThread.java deleted file mode 100644 index 5b6fb4d43..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaConsumerThread.java +++ /dev/null @@ -1,565 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper; - -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.KafkaConsumer; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; -import org.apache.kafka.clients.consumer.OffsetCommitCallback; -import org.apache.kafka.common.Metric; -import org.apache.kafka.common.MetricName; -import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.errors.WakeupException; -import org.slf4j.Logger; - -import javax.annotation.Nonnull; - -import java.time.Duration; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.concurrent.atomic.AtomicReference; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * The thread the runs the {@link KafkaConsumer}, connecting to the brokers and polling records. The - * thread pushes the data into a {@link Handover} to be picked up by the fetcher that will - * deserialize and emit the records. - * - *

    IMPORTANT: This thread must not be interrupted when attempting to shut it down. The - * Kafka consumer code was found to not always handle interrupts well, and to even deadlock in - * certain situations. - * - *

    Implementation Note: This code is written to be reusable in later versions of the - * KafkaConsumer. Because Kafka is not maintaining binary compatibility, we use a "call bridge" as - * an indirection to the KafkaConsumer calls that change signature. - */ -@Internal -@Deprecated -public class KafkaConsumerThread extends Thread { - - /** Logger for this consumer. */ - private final Logger log; - - /** The handover of data and exceptions between the consumer thread and the task thread. */ - private final Handover handover; - - /** The next offsets that the main thread should commit and the commit callback. */ - private final AtomicReference< - Tuple2, KafkaCommitCallback>> - nextOffsetsToCommit; - - /** The configuration for the Kafka consumer. */ - private final Properties kafkaProperties; - - /** The queue of unassigned partitions that we need to assign to the Kafka consumer. */ - private final ClosableBlockingQueue> - unassignedPartitionsQueue; - - /** The maximum number of milliseconds to wait for a fetch batch. */ - private final long pollTimeout; - - /** Flag whether to add Kafka's metrics to the Flink metrics. */ - private final boolean useMetrics; - - /** - * @deprecated We should only be publishing to the {{@link #consumerMetricGroup}}. This is kept - * to retain compatibility for metrics. - */ - @Deprecated private final MetricGroup subtaskMetricGroup; - - /** We get this from the outside to publish metrics. */ - private final MetricGroup consumerMetricGroup; - - /** Reference to the Kafka consumer, once it is created. */ - private volatile KafkaConsumer consumer; - - /** This lock is used to isolate the consumer for partition reassignment. */ - private final Object consumerReassignmentLock; - - /** Indication if this consumer has any assigned partition. */ - private boolean hasAssignedPartitions; - - /** - * Flag to indicate whether an external operation ({@link #setOffsetsToCommit(Map, - * KafkaCommitCallback)} or {@link #shutdown()}) had attempted to wakeup the consumer while it - * was isolated for partition reassignment. - */ - private volatile boolean hasBufferedWakeup; - - /** Flag to mark the main work loop as alive. */ - private volatile boolean running; - - /** Flag tracking whether the latest commit request has completed. */ - private volatile boolean commitInProgress; - - public KafkaConsumerThread( - Logger log, - Handover handover, - Properties kafkaProperties, - ClosableBlockingQueue> - unassignedPartitionsQueue, - String threadName, - long pollTimeout, - boolean useMetrics, - MetricGroup consumerMetricGroup, - MetricGroup subtaskMetricGroup) { - - super(threadName); - setDaemon(true); - - this.log = checkNotNull(log); - this.handover = checkNotNull(handover); - this.kafkaProperties = checkNotNull(kafkaProperties); - this.consumerMetricGroup = checkNotNull(consumerMetricGroup); - this.subtaskMetricGroup = checkNotNull(subtaskMetricGroup); - - this.unassignedPartitionsQueue = checkNotNull(unassignedPartitionsQueue); - - this.pollTimeout = pollTimeout; - this.useMetrics = useMetrics; - - this.consumerReassignmentLock = new Object(); - this.nextOffsetsToCommit = new AtomicReference<>(); - this.running = true; - } - - // ------------------------------------------------------------------------ - - @Override - public void run() { - // early exit check - if (!running) { - return; - } - - // this is the means to talk to FlinkKafkaConsumer's main thread - final Handover handover = this.handover; - - // This method initializes the KafkaConsumer and guarantees it is torn down properly. - // This is important, because the consumer has multi-threading issues, - // including concurrent 'close()' calls. - try { - this.consumer = getConsumer(kafkaProperties); - } catch (Throwable t) { - handover.reportError(t); - return; - } - - // from here on, the consumer is guaranteed to be closed properly - try { - // register Kafka's very own metrics in Flink's metric reporters - if (useMetrics) { - // register Kafka metrics to Flink - Map metrics = consumer.metrics(); - if (metrics == null) { - // MapR's Kafka implementation returns null here. - log.info("Consumer implementation does not support metrics"); - } else { - // we have Kafka metrics, register them - for (Map.Entry metric : metrics.entrySet()) { - consumerMetricGroup.gauge( - metric.getKey().name(), new KafkaMetricWrapper(metric.getValue())); - - // TODO this metric is kept for compatibility purposes; should remove in the - // future - subtaskMetricGroup.gauge( - metric.getKey().name(), new KafkaMetricWrapper(metric.getValue())); - } - } - } - - // early exit check - if (!running) { - return; - } - - // the latest bulk of records. May carry across the loop if the thread is woken up - // from blocking on the handover - ConsumerRecords records = null; - - // reused variable to hold found unassigned new partitions. - // found partitions are not carried across loops using this variable; - // they are carried across via re-adding them to the unassigned partitions queue - List> newPartitions; - - // main fetch loop - while (running) { - - // check if there is something to commit - if (!commitInProgress) { - // get and reset the work-to-be committed, so we don't repeatedly commit the - // same - final Tuple2, KafkaCommitCallback> - commitOffsetsAndCallback = nextOffsetsToCommit.getAndSet(null); - - if (commitOffsetsAndCallback != null) { - log.debug("Sending async offset commit request to Kafka broker"); - - // also record that a commit is already in progress - // the order here matters! first set the flag, then send the commit command. - commitInProgress = true; - retryOnceOnWakeup( - () -> - consumer.commitAsync( - commitOffsetsAndCallback.f0, - new CommitCallback(commitOffsetsAndCallback.f1)), - "commitAsync"); - } - } - - try { - if (hasAssignedPartitions) { - newPartitions = unassignedPartitionsQueue.pollBatch(); - } else { - // if no assigned partitions block until we get at least one - // instead of hot spinning this loop. We rely on a fact that - // unassignedPartitionsQueue will be closed on a shutdown, so - // we don't block indefinitely - newPartitions = unassignedPartitionsQueue.getBatchBlocking(); - } - if (newPartitions != null) { - reassignPartitions(newPartitions); - } - } catch (AbortedReassignmentException e) { - continue; - } - - if (!hasAssignedPartitions) { - // Without assigned partitions KafkaConsumer.poll will throw an exception - continue; - } - - // get the next batch of records, unless we did not manage to hand the old batch - // over - if (records == null) { - try { - records = consumer.poll(Duration.ofMillis(pollTimeout)); - } catch (WakeupException we) { - continue; - } - } - - try { - handover.produce(records); - records = null; - } catch (Handover.WakeupException e) { - // fall through the loop - } - } - // end main fetch loop - } catch (Throwable t) { - // let the main thread know and exit - // it may be that this exception comes because the main thread closed the handover, in - // which case the below reporting is irrelevant, but does not hurt either - handover.reportError(t); - } finally { - // make sure the handover is closed if it is not already closed or has an error - handover.close(); - - // make sure the KafkaConsumer is closed - try { - consumer.close(); - } catch (Throwable t) { - log.warn("Error while closing Kafka consumer", t); - } - } - } - - /** - * Shuts this thread down, waking up the thread gracefully if blocked (without - * Thread.interrupt() calls). - */ - public void shutdown() { - running = false; - - // wake up all blocking calls on the queue - unassignedPartitionsQueue.close(); - - // We cannot call close() on the KafkaConsumer, because it will actually throw - // an exception if a concurrent call is in progress - - // this wakes up the consumer if it is blocked handing over records - handover.wakeupProducer(); - - // this wakes up the consumer if it is blocked in a kafka poll - synchronized (consumerReassignmentLock) { - if (consumer != null) { - consumer.wakeup(); - } else { - // the consumer is currently isolated for partition reassignment; - // set this flag so that the wakeup state is restored once the reassignment is - // complete - hasBufferedWakeup = true; - } - } - } - - /** - * Tells this thread to commit a set of offsets. This method does not block, the committing - * operation will happen asynchronously. - * - *

    Only one commit operation may be pending at any time. If the committing takes longer than - * the frequency with which this method is called, then some commits may be skipped due to being - * superseded by newer ones. - * - * @param offsetsToCommit The offsets to commit - * @param commitCallback callback when Kafka commit completes - */ - void setOffsetsToCommit( - Map offsetsToCommit, - @Nonnull KafkaCommitCallback commitCallback) { - - // record the work to be committed by the main consumer thread and make sure the consumer - // notices that - if (nextOffsetsToCommit.getAndSet(Tuple2.of(offsetsToCommit, commitCallback)) != null) { - log.warn( - "Committing offsets to Kafka takes longer than the checkpoint interval. " - + "Skipping commit of previous offsets because newer complete checkpoint offsets are available. " - + "This does not compromise Flink's checkpoint integrity."); - } - - // if the consumer is blocked in a poll() or handover operation, wake it up to commit soon - handover.wakeupProducer(); - - synchronized (consumerReassignmentLock) { - if (consumer != null) { - consumer.wakeup(); - } else { - // the consumer is currently isolated for partition reassignment; - // set this flag so that the wakeup state is restored once the reassignment is - // complete - hasBufferedWakeup = true; - } - } - } - - // ------------------------------------------------------------------------ - - /** - * Reestablishes the assigned partitions for the consumer. The reassigned partitions consists of - * the provided new partitions and whatever partitions was already previously assigned to the - * consumer. - * - *

    The reassignment process is protected against wakeup calls, so that after this method - * returns, the consumer is either untouched or completely reassigned with the correct offset - * positions. - * - *

    If the consumer was already woken-up prior to a reassignment resulting in an interruption - * any time during the reassignment, the consumer is guaranteed to roll back as if it was - * untouched. On the other hand, if there was an attempt to wakeup the consumer during the - * reassignment, the wakeup call is "buffered" until the reassignment completes. - * - *

    This method is exposed for testing purposes. - */ - @VisibleForTesting - void reassignPartitions(List> newPartitions) - throws Exception { - if (newPartitions.size() == 0) { - return; - } - hasAssignedPartitions = true; - boolean reassignmentStarted = false; - - // since the reassignment may introduce several Kafka blocking calls that cannot be - // interrupted, - // the consumer needs to be isolated from external wakeup calls in setOffsetsToCommit() and - // shutdown() - // until the reassignment is complete. - final KafkaConsumer consumerTmp; - synchronized (consumerReassignmentLock) { - consumerTmp = this.consumer; - this.consumer = null; - } - - final Map oldPartitionAssignmentsToPosition = new HashMap<>(); - try { - for (TopicPartition oldPartition : consumerTmp.assignment()) { - oldPartitionAssignmentsToPosition.put( - oldPartition, consumerTmp.position(oldPartition)); - } - - final List newPartitionAssignments = - new ArrayList<>( - newPartitions.size() + oldPartitionAssignmentsToPosition.size()); - newPartitionAssignments.addAll(oldPartitionAssignmentsToPosition.keySet()); - newPartitionAssignments.addAll(convertKafkaPartitions(newPartitions)); - - // reassign with the new partitions - consumerTmp.assign(newPartitionAssignments); - reassignmentStarted = true; - - // old partitions should be seeked to their previous position - for (Map.Entry oldPartitionToPosition : - oldPartitionAssignmentsToPosition.entrySet()) { - consumerTmp.seek( - oldPartitionToPosition.getKey(), oldPartitionToPosition.getValue()); - } - - // offsets in the state of new partitions may still be placeholder sentinel values if we - // are: - // (1) starting fresh, - // (2) checkpoint / savepoint state we were restored with had not completely - // been replaced with actual offset values yet, or - // (3) the partition was newly discovered after startup; - // replace those with actual offsets, according to what the sentinel value represent. - for (KafkaTopicPartitionState newPartitionState : newPartitions) { - if (newPartitionState.getOffset() - == KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET) { - consumerTmp.seekToBeginning( - Collections.singletonList(newPartitionState.getKafkaPartitionHandle())); - newPartitionState.setOffset( - consumerTmp.position(newPartitionState.getKafkaPartitionHandle()) - 1); - } else if (newPartitionState.getOffset() - == KafkaTopicPartitionStateSentinel.LATEST_OFFSET) { - consumerTmp.seekToEnd( - Collections.singletonList(newPartitionState.getKafkaPartitionHandle())); - newPartitionState.setOffset( - consumerTmp.position(newPartitionState.getKafkaPartitionHandle()) - 1); - } else if (newPartitionState.getOffset() - == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) { - // the KafkaConsumer by default will automatically seek the consumer position - // to the committed group offset, so we do not need to do it. - - newPartitionState.setOffset( - consumerTmp.position(newPartitionState.getKafkaPartitionHandle()) - 1); - } else { - consumerTmp.seek( - newPartitionState.getKafkaPartitionHandle(), - newPartitionState.getOffset() + 1); - } - } - } catch (WakeupException e) { - // a WakeupException may be thrown if the consumer was invoked wakeup() - // before it was isolated for the reassignment. In this case, we abort the - // reassignment and just re-expose the original consumer. - - synchronized (consumerReassignmentLock) { - this.consumer = consumerTmp; - - // if reassignment had already started and affected the consumer, - // we do a full roll back so that it is as if it was left untouched - if (reassignmentStarted) { - this.consumer.assign( - new ArrayList<>(oldPartitionAssignmentsToPosition.keySet())); - - for (Map.Entry oldPartitionToPosition : - oldPartitionAssignmentsToPosition.entrySet()) { - this.consumer.seek( - oldPartitionToPosition.getKey(), oldPartitionToPosition.getValue()); - } - } - - // no need to restore the wakeup state in this case, - // since only the last wakeup call is effective anyways - hasBufferedWakeup = false; - - // re-add all new partitions back to the unassigned partitions queue to be picked up - // again - for (KafkaTopicPartitionState newPartition : newPartitions) { - unassignedPartitionsQueue.add(newPartition); - } - - // this signals the main fetch loop to continue through the loop - throw new AbortedReassignmentException(); - } - } - - // reassignment complete; expose the reassigned consumer - synchronized (consumerReassignmentLock) { - this.consumer = consumerTmp; - - // restore wakeup state for the consumer if necessary - if (hasBufferedWakeup) { - this.consumer.wakeup(); - hasBufferedWakeup = false; - } - } - } - - @VisibleForTesting - KafkaConsumer getConsumer(Properties kafkaProperties) { - return new KafkaConsumer<>(kafkaProperties); - } - - private void retryOnceOnWakeup(Runnable consumerCall, String description) { - try { - consumerCall.run(); - } catch (WakeupException we) { - log.info( - "Caught WakeupException while executing Kafka consumer call for {}. Will retry it once.", - description); - consumerCall.run(); - } - } - - // ------------------------------------------------------------------------ - // Utilities - // ------------------------------------------------------------------------ - - private static List convertKafkaPartitions( - List> partitions) { - ArrayList result = new ArrayList<>(partitions.size()); - for (KafkaTopicPartitionState p : partitions) { - result.add(p.getKafkaPartitionHandle()); - } - return result; - } - - private class CommitCallback implements OffsetCommitCallback { - - private final KafkaCommitCallback internalCommitCallback; - - CommitCallback(KafkaCommitCallback internalCommitCallback) { - this.internalCommitCallback = checkNotNull(internalCommitCallback); - } - - @Override - public void onComplete(Map offsets, Exception ex) { - commitInProgress = false; - - if (ex != null) { - log.warn( - "Committing offsets to Kafka failed. This does not compromise Flink's checkpoints.", - ex); - internalCommitCallback.onException(ex); - } else { - internalCommitCallback.onSuccess(); - } - } - } - - /** - * Utility exception that serves as a signal for the main loop to continue through the loop if a - * reassignment attempt was aborted due to an pre-reassignment wakeup call on the consumer. - */ - private static class AbortedReassignmentException extends Exception { - private static final long serialVersionUID = 1L; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaDeserializationSchemaWrapper.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaDeserializationSchemaWrapper.java deleted file mode 100644 index b754b4d09..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaDeserializationSchemaWrapper.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.serialization.DeserializationSchema; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; -import org.apache.flink.util.Collector; - -import org.apache.kafka.clients.consumer.ConsumerRecord; - -/** - * A simple wrapper for using the DeserializationSchema with the KafkaDeserializationSchema - * interface. - * - * @param The type created by the deserialization schema. - */ -@Internal -@Deprecated -public class KafkaDeserializationSchemaWrapper implements KafkaDeserializationSchema { - - private static final long serialVersionUID = 2651665280744549932L; - - private final DeserializationSchema deserializationSchema; - - public KafkaDeserializationSchemaWrapper(DeserializationSchema deserializationSchema) { - this.deserializationSchema = deserializationSchema; - } - - @Override - public void open(DeserializationSchema.InitializationContext context) throws Exception { - this.deserializationSchema.open(context); - } - - @Override - public T deserialize(ConsumerRecord record) throws Exception { - throw new UnsupportedOperationException("Should never be called"); - } - - @Override - public void deserialize(ConsumerRecord message, Collector out) - throws Exception { - deserializationSchema.deserialize(message.value(), out); - } - - @Override - public boolean isEndOfStream(T nextElement) { - return deserializationSchema.isEndOfStream(nextElement); - } - - @Override - public TypeInformation getProducedType() { - return deserializationSchema.getProducedType(); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaFetcher.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaFetcher.java deleted file mode 100644 index 428e6c7ce..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaFetcher.java +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.util.Collector; -import org.apache.flink.util.ExceptionUtils; -import org.apache.flink.util.SerializedValue; - -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; -import org.apache.kafka.common.TopicPartition; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nonnull; - -import java.util.ArrayDeque; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Queue; - -import static org.apache.flink.util.Preconditions.checkState; - -/** - * A fetcher that fetches data from Kafka brokers via the Kafka consumer API. - * - * @param The type of elements produced by the fetcher. - */ -@Internal -@Deprecated -public class KafkaFetcher extends AbstractFetcher { - - private static final Logger LOG = LoggerFactory.getLogger(KafkaFetcher.class); - - // ------------------------------------------------------------------------ - - /** The schema to convert between Kafka's byte messages, and Flink's objects. */ - private final KafkaDeserializationSchema deserializer; - - /** A collector to emit records in batch (bundle). * */ - private final KafkaCollector kafkaCollector; - - /** The handover of data and exceptions between the consumer thread and the task thread. */ - final Handover handover; - - /** - * The thread that runs the actual KafkaConsumer and hand the record batches to this fetcher. - */ - final KafkaConsumerThread consumerThread; - - /** Flag to mark the main work loop as alive. */ - volatile boolean running = true; - - // ------------------------------------------------------------------------ - - public KafkaFetcher( - SourceFunction.SourceContext sourceContext, - Map assignedPartitionsWithInitialOffsets, - SerializedValue> watermarkStrategy, - ProcessingTimeService processingTimeProvider, - long autoWatermarkInterval, - ClassLoader userCodeClassLoader, - String taskNameWithSubtasks, - KafkaDeserializationSchema deserializer, - Properties kafkaProperties, - long pollTimeout, - MetricGroup subtaskMetricGroup, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - super( - sourceContext, - assignedPartitionsWithInitialOffsets, - watermarkStrategy, - processingTimeProvider, - autoWatermarkInterval, - userCodeClassLoader, - consumerMetricGroup, - useMetrics); - - this.deserializer = deserializer; - this.handover = new Handover(); - - this.consumerThread = - new KafkaConsumerThread( - LOG, - handover, - kafkaProperties, - unassignedPartitionsQueue, - getFetcherName() + " for " + taskNameWithSubtasks, - pollTimeout, - useMetrics, - consumerMetricGroup, - subtaskMetricGroup); - this.kafkaCollector = new KafkaCollector(); - } - - // ------------------------------------------------------------------------ - // Fetcher work methods - // ------------------------------------------------------------------------ - - @Override - public void runFetchLoop() throws Exception { - try { - // kick off the actual Kafka consumer - consumerThread.start(); - - while (running) { - // this blocks until we get the next records - // it automatically re-throws exceptions encountered in the consumer thread - final ConsumerRecords records = handover.pollNext(); - - // get the records for each topic partition - for (KafkaTopicPartitionState partition : - subscribedPartitionStates()) { - - List> partitionRecords = - records.records(partition.getKafkaPartitionHandle()); - - partitionConsumerRecordsHandler(partitionRecords, partition); - } - } - } catch (Handover.ClosedException ex) { - if (running) { - // rethrow, only if we are running, if fetcher is not running we should not throw - // the ClosedException, as we are stopping gracefully - ExceptionUtils.rethrowException(ex); - } - } finally { - // this signals the consumer thread that no more work is to be done - consumerThread.shutdown(); - } - - // on a clean exit, wait for the runner thread - try { - consumerThread.join(); - } catch (InterruptedException e) { - // may be the result of a wake-up interruption after an exception. - // we ignore this here and only restore the interruption state - Thread.currentThread().interrupt(); - } - } - - @Override - public void cancel() { - // flag the main thread to exit. A thread interrupt will come anyways. - running = false; - handover.close(); - consumerThread.shutdown(); - } - - /** Gets the name of this fetcher, for thread naming and logging purposes. */ - protected String getFetcherName() { - return "Kafka Fetcher"; - } - - protected void partitionConsumerRecordsHandler( - List> partitionRecords, - KafkaTopicPartitionState partition) - throws Exception { - - for (ConsumerRecord record : partitionRecords) { - deserializer.deserialize(record, kafkaCollector); - - // emit the actual records. this also updates offset state atomically and emits - // watermarks - emitRecordsWithTimestamps( - kafkaCollector.getRecords(), partition, record.offset(), record.timestamp()); - - if (kafkaCollector.isEndOfStreamSignalled()) { - // end of stream signaled - running = false; - break; - } - } - } - - // ------------------------------------------------------------------------ - // Implement Methods of the AbstractFetcher - // ------------------------------------------------------------------------ - - @Override - public TopicPartition createKafkaPartitionHandle(KafkaTopicPartition partition) { - return new TopicPartition(partition.getTopic(), partition.getPartition()); - } - - @Override - protected void doCommitInternalOffsetsToKafka( - Map offsets, @Nonnull KafkaCommitCallback commitCallback) - throws Exception { - - @SuppressWarnings("unchecked") - List> partitions = subscribedPartitionStates(); - - Map offsetsToCommit = new HashMap<>(partitions.size()); - - for (KafkaTopicPartitionState partition : partitions) { - Long lastProcessedOffset = offsets.get(partition.getKafkaTopicPartition()); - if (lastProcessedOffset != null) { - checkState(lastProcessedOffset >= 0, "Illegal offset value to commit"); - - // committed offsets through the KafkaConsumer need to be 1 more than the last - // processed offset. - // This does not affect Flink's checkpoints/saved state. - long offsetToCommit = lastProcessedOffset + 1; - - offsetsToCommit.put( - partition.getKafkaPartitionHandle(), new OffsetAndMetadata(offsetToCommit)); - partition.setCommittedOffset(offsetToCommit); - } - } - - // record the work to be committed by the main consumer thread and make sure the consumer - // notices that - consumerThread.setOffsetsToCommit(offsetsToCommit, commitCallback); - } - - private class KafkaCollector implements Collector { - private final Queue records = new ArrayDeque<>(); - - private boolean endOfStreamSignalled = false; - - @Override - public void collect(T record) { - // do not emit subsequent elements if the end of the stream reached - if (endOfStreamSignalled || deserializer.isEndOfStream(record)) { - endOfStreamSignalled = true; - return; - } - records.add(record); - } - - public Queue getRecords() { - return records; - } - - public boolean isEndOfStreamSignalled() { - return endOfStreamSignalled; - } - - @Override - public void close() {} - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaPartitionDiscoverer.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaPartitionDiscoverer.java deleted file mode 100644 index ef7162bde..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaPartitionDiscoverer.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -import org.apache.kafka.clients.consumer.KafkaConsumer; -import org.apache.kafka.common.PartitionInfo; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * A partition discoverer that can be used to discover topics and partitions metadata from Kafka - * brokers via the Kafka high-level consumer API. - */ -@Internal -@Deprecated -public class KafkaPartitionDiscoverer extends AbstractPartitionDiscoverer { - - private final Properties kafkaProperties; - - private KafkaConsumer kafkaConsumer; - - public KafkaPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks, - Properties kafkaProperties) { - - super(topicsDescriptor, indexOfThisSubtask, numParallelSubtasks); - this.kafkaProperties = checkNotNull(kafkaProperties); - } - - @Override - protected void initializeConnections() { - this.kafkaConsumer = new KafkaConsumer<>(kafkaProperties); - } - - @Override - protected List getAllTopics() throws AbstractPartitionDiscoverer.WakeupException { - try { - return new ArrayList<>(kafkaConsumer.listTopics().keySet()); - } catch (org.apache.kafka.common.errors.WakeupException e) { - // rethrow our own wakeup exception - throw new AbstractPartitionDiscoverer.WakeupException(); - } - } - - @Override - protected List getAllPartitionsForTopics(List topics) - throws WakeupException, RuntimeException { - final List partitions = new LinkedList<>(); - - try { - for (String topic : topics) { - final List kafkaPartitions = kafkaConsumer.partitionsFor(topic); - - if (kafkaPartitions == null) { - throw new RuntimeException( - String.format( - "Could not fetch partitions for %s. Make sure that the topic exists.", - topic)); - } - - for (PartitionInfo partitionInfo : kafkaPartitions) { - partitions.add( - new KafkaTopicPartition( - partitionInfo.topic(), partitionInfo.partition())); - } - } - } catch (org.apache.kafka.common.errors.WakeupException e) { - // rethrow our own wakeup exception - throw new WakeupException(); - } - - return partitions; - } - - @Override - protected void wakeupConnections() { - if (this.kafkaConsumer != null) { - this.kafkaConsumer.wakeup(); - } - } - - @Override - protected void closeConnections() throws Exception { - if (this.kafkaConsumer != null) { - this.kafkaConsumer.close(); - - // de-reference the consumer to avoid closing multiple times - this.kafkaConsumer = null; - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaSerializationSchemaWrapper.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaSerializationSchemaWrapper.java deleted file mode 100644 index 147fad9b6..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaSerializationSchemaWrapper.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.streaming.connectors.kafka.KafkaContextAware; -import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; - -import org.apache.kafka.clients.producer.ProducerRecord; - -import javax.annotation.Nullable; - -/** - * An adapter from old style interfaces such as {@link - * org.apache.flink.api.common.serialization.SerializationSchema}, {@link - * org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner} to the {@link - * KafkaSerializationSchema}. - */ -@Internal -@Deprecated -public class KafkaSerializationSchemaWrapper - implements KafkaSerializationSchema, KafkaContextAware { - - private final FlinkKafkaPartitioner partitioner; - private final SerializationSchema serializationSchema; - private final String topic; - private boolean writeTimestamp; - - private int[] partitions; - private int parallelInstanceId; - private int numParallelInstances; - - public KafkaSerializationSchemaWrapper( - String topic, - FlinkKafkaPartitioner partitioner, - boolean writeTimestamp, - SerializationSchema serializationSchema) { - this.partitioner = partitioner; - this.serializationSchema = serializationSchema; - this.topic = topic; - this.writeTimestamp = writeTimestamp; - } - - @Override - public void open(SerializationSchema.InitializationContext context) throws Exception { - serializationSchema.open(context); - if (partitioner != null) { - partitioner.open(parallelInstanceId, numParallelInstances); - } - } - - @Override - public ProducerRecord serialize(T element, @Nullable Long timestamp) { - byte[] serialized = serializationSchema.serialize(element); - final Integer partition; - if (partitioner != null) { - partition = partitioner.partition(element, null, serialized, topic, partitions); - } else { - partition = null; - } - - final Long timestampToWrite; - if (writeTimestamp) { - timestampToWrite = timestamp; - } else { - timestampToWrite = null; - } - - return new ProducerRecord<>(topic, partition, timestampToWrite, null, serialized); - } - - @Override - public String getTargetTopic(T element) { - return topic; - } - - @Override - public void setPartitions(int[] partitions) { - this.partitions = partitions; - } - - @Override - public void setParallelInstanceId(int parallelInstanceId) { - this.parallelInstanceId = parallelInstanceId; - } - - @Override - public void setNumParallelInstances(int numParallelInstances) { - this.numParallelInstances = numParallelInstances; - } - - public void setWriteTimestamp(boolean writeTimestamp) { - this.writeTimestamp = writeTimestamp; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaShuffleFetcher.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaShuffleFetcher.java deleted file mode 100644 index c61db83f0..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaShuffleFetcher.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.common.typeutils.base.ByteSerializer; -import org.apache.flink.api.common.typeutils.base.IntSerializer; -import org.apache.flink.api.common.typeutils.base.LongSerializer; -import org.apache.flink.core.memory.DataInputDeserializer; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.util.Preconditions; -import org.apache.flink.util.SerializedValue; - -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.common.TopicPartition; - -import java.io.Serializable; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Properties; - -import static org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffleProducer.KafkaSerializer.TAG_REC_WITHOUT_TIMESTAMP; -import static org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffleProducer.KafkaSerializer.TAG_REC_WITH_TIMESTAMP; -import static org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffleProducer.KafkaSerializer.TAG_WATERMARK; - -/** Fetch data from Kafka for Kafka Shuffle. */ -@Internal -@Deprecated -public class KafkaShuffleFetcher extends KafkaFetcher { - /** The handler to check and generate watermarks from fetched records. * */ - private final WatermarkHandler watermarkHandler; - - /** The schema to convert between Kafka's byte messages, and Flink's objects. */ - private final KafkaShuffleElementDeserializer kafkaShuffleDeserializer; - - public KafkaShuffleFetcher( - SourceFunction.SourceContext sourceContext, - Map assignedPartitionsWithInitialOffsets, - SerializedValue> watermarkStrategy, - ProcessingTimeService processingTimeProvider, - long autoWatermarkInterval, - ClassLoader userCodeClassLoader, - String taskNameWithSubtasks, - KafkaDeserializationSchema deserializer, - Properties kafkaProperties, - long pollTimeout, - MetricGroup subtaskMetricGroup, - MetricGroup consumerMetricGroup, - boolean useMetrics, - TypeSerializer typeSerializer, - int producerParallelism) - throws Exception { - super( - sourceContext, - assignedPartitionsWithInitialOffsets, - watermarkStrategy, - processingTimeProvider, - autoWatermarkInterval, - userCodeClassLoader, - taskNameWithSubtasks, - deserializer, - kafkaProperties, - pollTimeout, - subtaskMetricGroup, - consumerMetricGroup, - useMetrics); - - this.kafkaShuffleDeserializer = new KafkaShuffleElementDeserializer<>(typeSerializer); - this.watermarkHandler = new WatermarkHandler(producerParallelism); - } - - @Override - protected String getFetcherName() { - return "Kafka Shuffle Fetcher"; - } - - @Override - protected void partitionConsumerRecordsHandler( - List> partitionRecords, - KafkaTopicPartitionState partition) - throws Exception { - - for (ConsumerRecord record : partitionRecords) { - final KafkaShuffleElement element = kafkaShuffleDeserializer.deserialize(record); - - // TODO: Do we need to check the end of stream if reaching the end watermark - // TODO: Currently, if one of the partition sends an end-of-stream signal the fetcher - // stops running. - // The current "ending of stream" logic in KafkaFetcher a bit strange: if any partition - // has a record - // signaled as "END_OF_STREAM", the fetcher will stop running. Notice that the signal is - // coming from - // the deserializer, which means from Kafka data itself. But it is possible that other - // topics - // and partitions still have data to read. Finishing reading Partition0 can not - // guarantee that Partition1 - // also finishes. - if (element.isRecord()) { - // timestamp is inherent from upstream - // If using ProcessTime, timestamp is going to be ignored (upstream does not include - // timestamp as well) - // If using IngestionTime, timestamp is going to be overwritten - // If using EventTime, timestamp is going to be used - synchronized (checkpointLock) { - KafkaShuffleRecord elementAsRecord = element.asRecord(); - sourceContext.collectWithTimestamp( - elementAsRecord.value, - elementAsRecord.timestamp == null - ? record.timestamp() - : elementAsRecord.timestamp); - partition.setOffset(record.offset()); - } - } else if (element.isWatermark()) { - final KafkaShuffleWatermark watermark = element.asWatermark(); - Optional newWatermark = - watermarkHandler.checkAndGetNewWatermark(watermark); - newWatermark.ifPresent(sourceContext::emitWatermark); - } - } - } - - /** An element in a KafkaShuffle. Can be a record or a Watermark. */ - @VisibleForTesting - public abstract static class KafkaShuffleElement { - - public boolean isRecord() { - return getClass() == KafkaShuffleRecord.class; - } - - public boolean isWatermark() { - return getClass() == KafkaShuffleWatermark.class; - } - - public KafkaShuffleRecord asRecord() { - return (KafkaShuffleRecord) this; - } - - public KafkaShuffleWatermark asWatermark() { - return (KafkaShuffleWatermark) this; - } - } - - /** - * A watermark element in a KafkaShuffle. It includes - subtask index where the watermark is - * coming from - watermark timestamp - */ - @VisibleForTesting - public static class KafkaShuffleWatermark extends KafkaShuffleElement { - final int subtask; - final long watermark; - - KafkaShuffleWatermark(int subtask, long watermark) { - this.subtask = subtask; - this.watermark = watermark; - } - - public int getSubtask() { - return subtask; - } - - public long getWatermark() { - return watermark; - } - } - - /** - * One value with Type T in a KafkaShuffle. This stores the value and an optional associated - * timestamp. - */ - @VisibleForTesting - public static class KafkaShuffleRecord extends KafkaShuffleElement { - final T value; - final Long timestamp; - - KafkaShuffleRecord(T value) { - this.value = value; - this.timestamp = null; - } - - KafkaShuffleRecord(long timestamp, T value) { - this.value = value; - this.timestamp = timestamp; - } - - public T getValue() { - return value; - } - - public Long getTimestamp() { - return timestamp; - } - } - - /** Deserializer for KafkaShuffleElement. */ - @VisibleForTesting - public static class KafkaShuffleElementDeserializer implements Serializable { - private static final long serialVersionUID = 1000001L; - - private final TypeSerializer typeSerializer; - - private transient DataInputDeserializer dis; - - @VisibleForTesting - public KafkaShuffleElementDeserializer(TypeSerializer typeSerializer) { - this.typeSerializer = typeSerializer; - } - - @VisibleForTesting - public KafkaShuffleElement deserialize(ConsumerRecord record) - throws Exception { - byte[] value = record.value(); - - if (dis != null) { - dis.setBuffer(value); - } else { - dis = new DataInputDeserializer(value); - } - - // version byte - ByteSerializer.INSTANCE.deserialize(dis); - int tag = ByteSerializer.INSTANCE.deserialize(dis); - - if (tag == TAG_REC_WITHOUT_TIMESTAMP) { - return new KafkaShuffleRecord<>(typeSerializer.deserialize(dis)); - } else if (tag == TAG_REC_WITH_TIMESTAMP) { - return new KafkaShuffleRecord<>( - LongSerializer.INSTANCE.deserialize(dis), typeSerializer.deserialize(dis)); - } else if (tag == TAG_WATERMARK) { - return new KafkaShuffleWatermark( - IntSerializer.INSTANCE.deserialize(dis), - LongSerializer.INSTANCE.deserialize(dis)); - } - - throw new UnsupportedOperationException("Unsupported tag format"); - } - } - - /** WatermarkHandler to check and generate watermarks from fetched records. */ - private static class WatermarkHandler { - private final int producerParallelism; - private final Map subtaskWatermark; - - private long currentMinWatermark = Long.MIN_VALUE; - - WatermarkHandler(int producerParallelism) { - this.producerParallelism = producerParallelism; - this.subtaskWatermark = new HashMap<>(producerParallelism); - } - - private Optional checkAndGetNewWatermark(KafkaShuffleWatermark newWatermark) { - // watermarks is incremental for the same partition and PRODUCER subtask - Long currentSubTaskWatermark = subtaskWatermark.get(newWatermark.subtask); - - // watermark is strictly increasing - Preconditions.checkState( - (currentSubTaskWatermark == null) - || (currentSubTaskWatermark < newWatermark.watermark), - "Watermark should always increase: current : new " - + currentSubTaskWatermark - + ":" - + newWatermark.watermark); - - subtaskWatermark.put(newWatermark.subtask, newWatermark.watermark); - - if (subtaskWatermark.values().size() < producerParallelism) { - return Optional.empty(); - } - - long minWatermark = - subtaskWatermark.values().stream() - .min(Comparator.naturalOrder()) - .orElse(Long.MIN_VALUE); - if (currentMinWatermark < minWatermark) { - currentMinWatermark = minWatermark; - return Optional.of(new Watermark(minWatermark)); - } else { - return Optional.empty(); - } - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartition.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartition.java deleted file mode 100644 index 0e91042f6..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartition.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.PublicEvolving; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import static java.util.Objects.requireNonNull; - -/** - * Flink's description of a partition in a Kafka topic. Serializable, and common across all Kafka - * consumer subclasses (0.8, 0.9, ...) - * - *

    Note: This class must not change in its structure, because it would change the serialization - * format and make previous savepoints unreadable. - * - * @deprecated Will be turned into internal class when {@link - * org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer} is removed. Replace with - * {@link org.apache.kafka.common.TopicPartition}. - */ -@PublicEvolving -@Deprecated -public final class KafkaTopicPartition implements Serializable { - - /** - * THIS SERIAL VERSION UID MUST NOT CHANGE, BECAUSE IT WOULD BREAK READING OLD SERIALIZED - * INSTANCES FROM SAVEPOINTS. - */ - private static final long serialVersionUID = 722083576322742325L; - - // ------------------------------------------------------------------------ - - private final String topic; - private final int partition; - private final int cachedHash; - - public KafkaTopicPartition(String topic, int partition) { - this.topic = requireNonNull(topic); - this.partition = partition; - this.cachedHash = 31 * topic.hashCode() + partition; - } - - // ------------------------------------------------------------------------ - - public String getTopic() { - return topic; - } - - public int getPartition() { - return partition; - } - - // ------------------------------------------------------------------------ - - @Override - public String toString() { - return "KafkaTopicPartition{" + "topic='" + topic + '\'' + ", partition=" + partition + '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } else if (o instanceof KafkaTopicPartition) { - KafkaTopicPartition that = (KafkaTopicPartition) o; - return this.partition == that.partition && this.topic.equals(that.topic); - } else { - return false; - } - } - - @Override - public int hashCode() { - return cachedHash; - } - - // ------------------------------------------------------------------------ - // Utilities - // ------------------------------------------------------------------------ - - public static String toString(Map map) { - StringBuilder sb = new StringBuilder(); - for (Map.Entry p : map.entrySet()) { - KafkaTopicPartition ktp = p.getKey(); - sb.append(ktp.getTopic()) - .append(":") - .append(ktp.getPartition()) - .append("=") - .append(p.getValue()) - .append(", "); - } - return sb.toString(); - } - - public static String toString(List partitions) { - StringBuilder sb = new StringBuilder(); - for (KafkaTopicPartition p : partitions) { - sb.append(p.getTopic()).append(":").append(p.getPartition()).append(", "); - } - return sb.toString(); - } - - public static List dropLeaderData( - List partitionInfos) { - List ret = new ArrayList<>(partitionInfos.size()); - for (KafkaTopicPartitionLeader ktpl : partitionInfos) { - ret.add(ktpl.getTopicPartition()); - } - return ret; - } - - /** A {@link java.util.Comparator} for {@link KafkaTopicPartition}s. */ - public static class Comparator implements java.util.Comparator { - @Override - public int compare(KafkaTopicPartition p1, KafkaTopicPartition p2) { - if (!p1.getTopic().equals(p2.getTopic())) { - return p1.getTopic().compareTo(p2.getTopic()); - } else { - return Integer.compare(p1.getPartition(), p2.getPartition()); - } - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionAssigner.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionAssigner.java deleted file mode 100644 index 83c7483ff..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionAssigner.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -/** Utility for assigning Kafka partitions to consumer subtasks. */ -@Internal -@Deprecated -public class KafkaTopicPartitionAssigner { - - /** - * Returns the index of the target subtask that a specific Kafka partition should be assigned - * to. - * - *

    The resulting distribution of partitions of a single topic has the following contract: - * - *

      - *
    • 1. Uniformly distributed across subtasks - *
    • 2. Partitions are round-robin distributed (strictly clockwise w.r.t. ascending subtask - * indices) by using the partition id as the offset from a starting index (i.e., the index - * of the subtask which partition 0 of the topic will be assigned to, determined using the - * topic name). - *
    - * - *

    The above contract is crucial and cannot be broken. Consumer subtasks rely on this - * contract to locally filter out partitions that it should not subscribe to, guaranteeing that - * all partitions of a single topic will always be assigned to some subtask in a uniformly - * distributed manner. - * - * @param partition the Kafka partition - * @param numParallelSubtasks total number of parallel subtasks - * @return index of the target subtask that the Kafka partition should be assigned to. - */ - public static int assign(KafkaTopicPartition partition, int numParallelSubtasks) { - return assign(partition.getTopic(), partition.getPartition(), numParallelSubtasks); - } - - public static int assign(String topic, int partition, int numParallelSubtasks) { - int startIndex = ((topic.hashCode() * 31) & 0x7FFFFFFF) % numParallelSubtasks; - - // here, the assumption is that the id of Kafka partitions are always ascending - // starting from 0, and therefore can be used directly as the offset clockwise from the - // start index - return (startIndex + partition) % numParallelSubtasks; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionLeader.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionLeader.java deleted file mode 100644 index 031400d6e..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionLeader.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -import org.apache.kafka.common.Node; - -import java.io.Serializable; - -/** - * Serializable Topic Partition info with leader Node information. This class is used at runtime. - */ -@Internal -@Deprecated -public class KafkaTopicPartitionLeader implements Serializable { - - private static final long serialVersionUID = 9145855900303748582L; - - private final int leaderId; - private final int leaderPort; - private final String leaderHost; - private final KafkaTopicPartition topicPartition; - private final int cachedHash; - - public KafkaTopicPartitionLeader(KafkaTopicPartition topicPartition, Node leader) { - this.topicPartition = topicPartition; - if (leader == null) { - this.leaderId = -1; - this.leaderHost = null; - this.leaderPort = -1; - } else { - this.leaderId = leader.id(); - this.leaderPort = leader.port(); - this.leaderHost = leader.host(); - } - int cachedHash = (leader == null) ? 14 : leader.hashCode(); - this.cachedHash = 31 * cachedHash + topicPartition.hashCode(); - } - - public KafkaTopicPartition getTopicPartition() { - return topicPartition; - } - - public Node getLeader() { - if (this.leaderId == -1) { - return null; - } else { - return new Node(leaderId, leaderHost, leaderPort); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof KafkaTopicPartitionLeader)) { - return false; - } - - KafkaTopicPartitionLeader that = (KafkaTopicPartitionLeader) o; - - if (!topicPartition.equals(that.topicPartition)) { - return false; - } - return leaderId == that.leaderId - && leaderPort == that.leaderPort - && leaderHost.equals(that.leaderHost); - } - - @Override - public int hashCode() { - return cachedHash; - } - - @Override - public String toString() { - return "KafkaTopicPartitionLeader{" - + "leaderId=" - + leaderId - + ", leaderPort=" - + leaderPort - + ", leaderHost='" - + leaderHost - + '\'' - + ", topic=" - + topicPartition.getTopic() - + ", partition=" - + topicPartition.getPartition() - + '}'; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionState.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionState.java deleted file mode 100644 index ee669e7e1..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionState.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -/** - * The state that the Flink Kafka Consumer holds for each Kafka partition. Includes the Kafka - * descriptor for partitions. - * - *

    This class describes the most basic state (only the offset), subclasses define more elaborate - * state, containing current watermarks and timestamp extractors. - * - * @param The type of the Kafka partition descriptor, which varies across Kafka versions. - */ -@Internal -@Deprecated -public class KafkaTopicPartitionState { - - // ------------------------------------------------------------------------ - - /** The Flink description of a Kafka partition. */ - private final KafkaTopicPartition partition; - - /** The Kafka description of a Kafka partition (varies across different Kafka versions). */ - private final KPH kafkaPartitionHandle; - - /** The offset within the Kafka partition that we already processed. */ - private volatile long offset; - - /** The offset of the Kafka partition that has been committed. */ - private volatile long committedOffset; - - // ------------------------------------------------------------------------ - - public KafkaTopicPartitionState(KafkaTopicPartition partition, KPH kafkaPartitionHandle) { - this.partition = partition; - this.kafkaPartitionHandle = kafkaPartitionHandle; - this.offset = KafkaTopicPartitionStateSentinel.OFFSET_NOT_SET; - this.committedOffset = KafkaTopicPartitionStateSentinel.OFFSET_NOT_SET; - } - - // ------------------------------------------------------------------------ - - /** - * Gets Flink's descriptor for the Kafka Partition. - * - * @return The Flink partition descriptor. - */ - public final KafkaTopicPartition getKafkaTopicPartition() { - return partition; - } - - /** - * Gets Kafka's descriptor for the Kafka Partition. - * - * @return The Kafka partition descriptor. - */ - public final KPH getKafkaPartitionHandle() { - return kafkaPartitionHandle; - } - - public final String getTopic() { - return partition.getTopic(); - } - - public final int getPartition() { - return partition.getPartition(); - } - - /** - * The current offset in the partition. This refers to the offset last element that we retrieved - * and emitted successfully. It is the offset that should be stored in a checkpoint. - */ - public final long getOffset() { - return offset; - } - - public final void setOffset(long offset) { - this.offset = offset; - } - - public final boolean isOffsetDefined() { - return offset != KafkaTopicPartitionStateSentinel.OFFSET_NOT_SET; - } - - public final void setCommittedOffset(long offset) { - this.committedOffset = offset; - } - - public final long getCommittedOffset() { - return committedOffset; - } - - public long extractTimestamp(T record, long kafkaEventTimestamp) { - return kafkaEventTimestamp; - } - - public void onEvent(T event, long timestamp) { - // do nothing - } - - public void onPeriodicEmit() { - // do nothing - } - - // ------------------------------------------------------------------------ - - @Override - public String toString() { - return "Partition: " - + partition - + ", KafkaPartitionHandle=" - + kafkaPartitionHandle - + ", offset=" - + (isOffsetDefined() ? String.valueOf(offset) : "(not set)"); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionStateWithWatermarkGenerator.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionStateWithWatermarkGenerator.java deleted file mode 100644 index f9c815fcc..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionStateWithWatermarkGenerator.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.eventtime.TimestampAssigner; -import org.apache.flink.api.common.eventtime.WatermarkGenerator; -import org.apache.flink.api.common.eventtime.WatermarkOutput; - -/** - * A special version of the per-kafka-partition-state that additionally holds a {@link - * TimestampAssigner}, {@link WatermarkGenerator}, an immediate {@link WatermarkOutput}, and a - * deferred {@link WatermarkOutput} for this partition. - * - *

    See {@link org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer} for an - * explanation of immediate and deferred {@link WatermarkOutput WatermarkOutputs.}. - * - * @param The type of records handled by the watermark generator - * @param The type of the Kafka partition descriptor, which varies across Kafka versions. - */ -@Internal -@Deprecated -public final class KafkaTopicPartitionStateWithWatermarkGenerator - extends KafkaTopicPartitionState { - - private final TimestampAssigner timestampAssigner; - - private final WatermarkGenerator watermarkGenerator; - - /** - * Refer to {@link org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer} for a - * description of immediate/deferred output. - */ - private final WatermarkOutput immediateOutput; - - /** - * Refer to {@link org.apache.flink.api.common.eventtime.WatermarkOutputMultiplexer} for a - * description of immediate/deferred output. - */ - private final WatermarkOutput deferredOutput; - - // ------------------------------------------------------------------------ - - public KafkaTopicPartitionStateWithWatermarkGenerator( - KafkaTopicPartition partition, - KPH kafkaPartitionHandle, - TimestampAssigner timestampAssigner, - WatermarkGenerator watermarkGenerator, - WatermarkOutput immediateOutput, - WatermarkOutput deferredOutput) { - super(partition, kafkaPartitionHandle); - - this.timestampAssigner = timestampAssigner; - this.watermarkGenerator = watermarkGenerator; - this.immediateOutput = immediateOutput; - this.deferredOutput = deferredOutput; - } - - // ------------------------------------------------------------------------ - - @Override - public long extractTimestamp(T record, long kafkaEventTimestamp) { - return timestampAssigner.extractTimestamp(record, kafkaEventTimestamp); - } - - @Override - public void onEvent(T event, long timestamp) { - watermarkGenerator.onEvent(event, timestamp, immediateOutput); - } - - @Override - public void onPeriodicEmit() { - watermarkGenerator.onPeriodicEmit(deferredOutput); - } - - // ------------------------------------------------------------------------ - - @Override - public String toString() { - return "KafkaTopicPartitionStateWithPeriodicWatermarks: partition=" - + getKafkaTopicPartition() - + ", offset=" - + getOffset(); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicsDescriptor.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicsDescriptor.java deleted file mode 100644 index 4bb37b1c2..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicsDescriptor.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -import javax.annotation.Nullable; - -import java.io.Serializable; -import java.util.List; -import java.util.regex.Pattern; - -import static org.apache.flink.util.Preconditions.checkArgument; - -/** - * A Kafka Topics Descriptor describes how the consumer subscribes to Kafka topics - either a fixed - * list of topics, or a topic pattern. - */ -@Internal -@Deprecated -public class KafkaTopicsDescriptor implements Serializable { - - private static final long serialVersionUID = -3807227764764900975L; - - private final List fixedTopics; - private final Pattern topicPattern; - - public KafkaTopicsDescriptor( - @Nullable List fixedTopics, @Nullable Pattern topicPattern) { - checkArgument( - (fixedTopics != null && topicPattern == null) - || (fixedTopics == null && topicPattern != null), - "Exactly one of either fixedTopics or topicPattern must be specified."); - - if (fixedTopics != null) { - checkArgument( - !fixedTopics.isEmpty(), - "If subscribing to a fixed topics list, the supplied list cannot be empty."); - } - - this.fixedTopics = fixedTopics; - this.topicPattern = topicPattern; - } - - public boolean isFixedTopics() { - return fixedTopics != null; - } - - public boolean isTopicPattern() { - return topicPattern != null; - } - - /** - * Check if the input topic matches the topics described by this KafkaTopicDescriptor. - * - * @return true if found a match. - */ - public boolean isMatchingTopic(String topic) { - if (isFixedTopics()) { - return getFixedTopics().contains(topic); - } else { - return topicPattern.matcher(topic).matches(); - } - } - - public List getFixedTopics() { - return fixedTopics; - } - - @Override - public String toString() { - return (fixedTopics == null) - ? "Topic Regex Pattern (" + topicPattern.pattern() + ")" - : "Fixed Topics (" + fixedTopics + ")"; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KeyedSerializationSchemaWrapper.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KeyedSerializationSchemaWrapper.java deleted file mode 100644 index ae4e922d5..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/KeyedSerializationSchemaWrapper.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; - -/** - * A simple wrapper for using the SerializationSchema with the KeyedSerializationSchema interface. - * - * @param The type to serialize - */ -@Internal -@Deprecated -public class KeyedSerializationSchemaWrapper implements KeyedSerializationSchema { - - private static final long serialVersionUID = 1351665280744549933L; - - private final SerializationSchema serializationSchema; - - public KeyedSerializationSchemaWrapper(SerializationSchema serializationSchema) { - this.serializationSchema = serializationSchema; - } - - public SerializationSchema getSerializationSchema() { - return serializationSchema; - } - - @Override - public byte[] serializeKey(T element) { - return null; - } - - @Override - public byte[] serializeValue(T element) { - return serializationSchema.serialize(element); - } - - @Override - public String getTargetTopic(T element) { - return null; // we are never overriding the topic - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/SourceContextWatermarkOutputAdapter.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/SourceContextWatermarkOutputAdapter.java deleted file mode 100644 index 68c4db12a..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/SourceContextWatermarkOutputAdapter.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.api.common.eventtime.Watermark; -import org.apache.flink.api.common.eventtime.WatermarkOutput; -import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext; - -/** - * A {@link org.apache.flink.api.common.eventtime.WatermarkOutput} that forwards calls to a {@link - * org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext}. - */ -@Deprecated -public class SourceContextWatermarkOutputAdapter implements WatermarkOutput { - private final SourceContext sourceContext; - - public SourceContextWatermarkOutputAdapter(SourceContext sourceContext) { - this.sourceContext = sourceContext; - } - - @Override - public void emitWatermark(Watermark watermark) { - sourceContext.emitWatermark( - new org.apache.flink.streaming.api.watermark.Watermark(watermark.getTimestamp())); - } - - @Override - public void markIdle() { - sourceContext.markAsTemporarilyIdle(); - } - - @Override - public void markActive() { - // will be set active with next watermark - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/TransactionalIdsGenerator.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/TransactionalIdsGenerator.java deleted file mode 100644 index cd6270acc..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/TransactionalIdsGenerator.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.annotation.Internal; - -import java.util.HashSet; -import java.util.Set; - -import static org.apache.flink.util.Preconditions.checkArgument; -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Class responsible for generating transactional ids to use when communicating with Kafka. - * - *

    It guarantees that: - * - *

      - *
    • generated ids to use will never clash with ids to use from different subtasks - *
    • generated ids to abort will never clash with ids to abort from different subtasks - *
    • generated ids to use will never clash with ids to abort from different subtasks - *
    - * - *

    In other words, any particular generated id will always be assigned to one and only one - * subtask. - */ -@Internal -@Deprecated -public class TransactionalIdsGenerator { - private final String prefix; - private final int subtaskIndex; - private final int totalNumberOfSubtasks; - private final int poolSize; - private final int safeScaleDownFactor; - - public TransactionalIdsGenerator( - String prefix, - int subtaskIndex, - int totalNumberOfSubtasks, - int poolSize, - int safeScaleDownFactor) { - checkArgument(subtaskIndex < totalNumberOfSubtasks); - checkArgument(poolSize > 0); - checkArgument(safeScaleDownFactor > 0); - checkArgument(subtaskIndex >= 0); - - this.prefix = checkNotNull(prefix); - this.subtaskIndex = subtaskIndex; - this.totalNumberOfSubtasks = totalNumberOfSubtasks; - this.poolSize = poolSize; - this.safeScaleDownFactor = safeScaleDownFactor; - } - - /** - * Range of available transactional ids to use is: [nextFreeTransactionalId, - * nextFreeTransactionalId + parallelism * kafkaProducersPoolSize) loop below picks in a - * deterministic way a subrange of those available transactional ids based on index of this - * subtask. - */ - public Set generateIdsToUse(long nextFreeTransactionalId) { - Set transactionalIds = new HashSet<>(); - for (int i = 0; i < poolSize; i++) { - long transactionalId = nextFreeTransactionalId + subtaskIndex * poolSize + i; - transactionalIds.add(generateTransactionalId(transactionalId)); - } - return transactionalIds; - } - - /** - * If we have to abort previous transactional id in case of restart after a failure BEFORE first - * checkpoint completed, we don't know what was the parallelism used in previous attempt. In - * that case we must guess the ids range to abort based on current configured pool size, current - * parallelism and safeScaleDownFactor. - */ - public Set generateIdsToAbort() { - Set idsToAbort = new HashSet<>(); - for (int i = 0; i < safeScaleDownFactor; i++) { - idsToAbort.addAll(generateIdsToUse(i * poolSize * totalNumberOfSubtasks)); - } - return idsToAbort; - } - - private String generateTransactionalId(long transactionalId) { - return prefix + "-" + transactionalId; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/metrics/KafkaConsumerMetricConstants.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/metrics/KafkaConsumerMetricConstants.java deleted file mode 100644 index 731089028..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/internals/metrics/KafkaConsumerMetricConstants.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals.metrics; - -import org.apache.flink.annotation.Internal; - -/** - * A collection of Kafka consumer metrics related constant strings. - * - *

    The names must not be changed, as that would break backward compatibility for the consumer's - * metrics. - */ -@Internal -@Deprecated -public class KafkaConsumerMetricConstants { - - public static final String KAFKA_CONSUMER_METRICS_GROUP = "KafkaConsumer"; - - // ------------------------------------------------------------------------ - // Per-subtask metrics - // ------------------------------------------------------------------------ - - public static final String COMMITS_SUCCEEDED_METRICS_COUNTER = "commitsSucceeded"; - public static final String COMMITS_FAILED_METRICS_COUNTER = "commitsFailed"; - - // ------------------------------------------------------------------------ - // Per-partition metrics - // ------------------------------------------------------------------------ - - public static final String OFFSETS_BY_TOPIC_METRICS_GROUP = "topic"; - public static final String OFFSETS_BY_PARTITION_METRICS_GROUP = "partition"; - - public static final String CURRENT_OFFSETS_METRICS_GAUGE = "currentOffsets"; - public static final String COMMITTED_OFFSETS_METRICS_GAUGE = "committedOffsets"; - - // ------------------------------------------------------------------------ - // Legacy metrics - // ------------------------------------------------------------------------ - - public static final String LEGACY_CURRENT_OFFSETS_METRICS_GROUP = "current-offsets"; - public static final String LEGACY_COMMITTED_OFFSETS_METRICS_GROUP = "committed-offsets"; -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/partitioner/FlinkFixedPartitioner.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/partitioner/FlinkFixedPartitioner.java index e70baea3b..3cf0ff164 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/partitioner/FlinkFixedPartitioner.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/partitioner/FlinkFixedPartitioner.java @@ -18,7 +18,8 @@ package org.apache.flink.streaming.connectors.kafka.partitioner; -import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.annotation.Internal; +import org.apache.flink.connector.kafka.sink.KafkaPartitioner; import org.apache.flink.util.Preconditions; /** @@ -54,13 +55,9 @@ *

    Not all Kafka partitions contain data To avoid such an unbalanced partitioning, use a * round-robin kafka partitioner (note that this will cause a lot of network connections between all * the Flink instances and all the Kafka brokers). - * - * @deprecated Will be turned into internal class when {@link - * org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer} is removed. */ -@PublicEvolving -@Deprecated -public class FlinkFixedPartitioner extends FlinkKafkaPartitioner { +@Internal +public class FlinkFixedPartitioner implements KafkaPartitioner { private static final long serialVersionUID = -3785320239953858777L; diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/partitioner/FlinkKafkaPartitioner.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/partitioner/FlinkKafkaPartitioner.java deleted file mode 100644 index 9568349a3..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/partitioner/FlinkKafkaPartitioner.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.partitioner; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.connector.kafka.sink.KafkaPartitioner; - -/** - * A {@link FlinkKafkaPartitioner} wraps logic on how to partition records across partitions of - * multiple Kafka topics. - * - * @deprecated Use {@link KafkaPartitioner} instead for {@link - * org.apache.flink.connector.kafka.sink.KafkaSink}. - */ -@PublicEvolving -@Deprecated -public abstract class FlinkKafkaPartitioner implements KafkaPartitioner { - - private static final long serialVersionUID = -9086719227828020494L; -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffle.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffle.java deleted file mode 100644 index bb7c76a67..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffle.java +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.shuffle; - -import org.apache.flink.annotation.Experimental; -import org.apache.flink.api.common.operators.Keys; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo; -import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.java.functions.KeySelector; -import org.apache.flink.api.java.tuple.Tuple; -import org.apache.flink.runtime.state.KeyGroupRangeAssignment; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.DataStreamUtils; -import org.apache.flink.streaming.api.datastream.KeyedStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.transformations.LegacySinkTransformation; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; -import org.apache.flink.streaming.util.keys.KeySelectorUtil; -import org.apache.flink.util.Preconditions; -import org.apache.flink.util.PropertiesUtil; - -import java.util.Properties; - -/** - * {@link FlinkKafkaShuffle} uses Kafka as a message bus to shuffle and persist data at the same - * time. - * - *

    Persisting shuffle data is useful when - you would like to reuse the shuffle data and/or, - - * you would like to avoid a full restart of a pipeline during failure recovery - * - *

    Persisting shuffle is achieved by wrapping a {@link FlinkKafkaShuffleProducer} and a {@link - * FlinkKafkaShuffleConsumer} together into a {@link FlinkKafkaShuffle}. Here is an example how to - * use a {@link FlinkKafkaShuffle}. - * - *

    {@code
    - * StreamExecutionEnvironment env = ... 					// create execution environment
    - * 	DataStream source = env.addSource(...)				// add data stream source
    - * 	DataStream dataStream = ...							// some transformation(s) based on source
    - *
    - * KeyedStream keyedStream = FlinkKafkaShuffle
    - * 	.persistentKeyBy(									// keyBy shuffle through kafka
    - * 			dataStream,										// data stream to be shuffled
    - * 			topic,											// Kafka topic written to
    - * 			producerParallelism,							// the number of tasks of a Kafka Producer
    - * 			numberOfPartitions,								// the number of partitions of the Kafka topic written to
    - * 			kafkaProperties,								// kafka properties for Kafka Producer and Consumer
    - * 			keySelector);							// key selector to retrieve key from `dataStream'
    - *
    - * keyedStream.transform...								// some other transformation(s)
    - *
    - * 	KeyedStream keyedStreamReuse = FlinkKafkaShuffle
    - * 		.readKeyBy(											// Read the Kafka shuffle data again for other usages
    - * 			topic,											// the topic of Kafka where data is persisted
    - * 			env,											// execution environment, and it can be a new environment
    - * 			typeInformation,								// type information of the data persisted in Kafka
    - * 			kafkaProperties,								// kafka properties for Kafka Consumer
    - * 			keySelector);							// key selector to retrieve key
    - *
    - * 	keyedStreamReuse.transform...							// some other transformation(s)
    - * }
    - * - *

    Usage of {@link FlinkKafkaShuffle#persistentKeyBy} is similar to {@link - * DataStream#keyBy(KeySelector)}. The differences are: - * - *

    1). Partitioning is done through {@link FlinkKafkaShuffleProducer}. {@link - * FlinkKafkaShuffleProducer} decides which partition a key goes when writing to Kafka - * - *

    2). Shuffle data can be reused through {@link FlinkKafkaShuffle#readKeyBy}, as shown in the - * example above. - * - *

    3). Job execution is decoupled by the persistent Kafka message bus. In the example, the job - * execution graph is decoupled to three regions: `KafkaShuffleProducer', `KafkaShuffleConsumer' and - * `KafkaShuffleConsumerReuse' through `PERSISTENT DATA` as shown below. If any region fails the - * execution, the other two keep progressing. - * - *

    - *     source -> ... KafkaShuffleProducer -> PERSISTENT DATA -> KafkaShuffleConsumer -> ...
    - *                                                |
    - *                                                | ----------> KafkaShuffleConsumerReuse -> ...
    - * 
    - * - * @deprecated This experimental feature never graduated to a stable feature and will be removed in - * future releases. In case of interest to port it to the Source/Sink API, please reach out to - * the Flink community. - */ -@Experimental -@Deprecated -public class FlinkKafkaShuffle { - static final String PRODUCER_PARALLELISM = "producer parallelism"; - static final String PARTITION_NUMBER = "partition number"; - - /** - * Uses Kafka as a message bus to persist keyBy shuffle. - * - *

    Persisting keyBy shuffle is achieved by wrapping a {@link FlinkKafkaShuffleProducer} and - * {@link FlinkKafkaShuffleConsumer} together. - * - *

    On the producer side, {@link FlinkKafkaShuffleProducer} is similar to {@link - * DataStream#keyBy(KeySelector)}. They use the same key group assignment function {@link - * KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes. - * Hence, each producer task can potentially write to each Kafka partition based on where the - * key goes. Here, `numberOfPartitions` equals to the key group size. In the case of using - * {@link TimeCharacteristic#EventTime}, each producer task broadcasts its watermark to ALL of - * the Kafka partitions to make sure watermark information is propagated correctly. - * - *

    On the consumer side, each consumer task should read partitions equal to the key group - * indices it is assigned. `numberOfPartitions` is the maximum parallelism of the consumer. This - * version only supports numberOfPartitions = consumerParallelism. In the case of using {@link - * TimeCharacteristic#EventTime}, a consumer task is responsible to emit watermarks. Watermarks - * are read from the corresponding Kafka partitions. Notice that a consumer task only starts to - * emit a watermark after reading at least one watermark from each producer task to make sure - * watermarks are monotonically increasing. Hence a consumer task needs to know - * `producerParallelism` as well. - * - * @see FlinkKafkaShuffle#writeKeyBy - * @see FlinkKafkaShuffle#readKeyBy - * @param dataStream Data stream to be shuffled - * @param topic Kafka topic written to - * @param producerParallelism Parallelism of producer - * @param numberOfPartitions Number of partitions - * @param properties Kafka properties - * @param keySelector Key selector to retrieve key from `dataStream' - * @param Type of the input data stream - * @param Type of key - */ - public static KeyedStream persistentKeyBy( - DataStream dataStream, - String topic, - int producerParallelism, - int numberOfPartitions, - Properties properties, - KeySelector keySelector) { - // KafkaProducer#propsToMap uses Properties purely as a HashMap without considering the - // default properties - // So we have to flatten the default property to first level elements. - Properties kafkaProperties = PropertiesUtil.flatten(properties); - kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism)); - kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions)); - - StreamExecutionEnvironment env = dataStream.getExecutionEnvironment(); - - writeKeyBy(dataStream, topic, kafkaProperties, keySelector); - return readKeyBy(topic, env, dataStream.getType(), kafkaProperties, keySelector); - } - - /** - * Uses Kafka as a message bus to persist keyBy shuffle. - * - *

    Persisting keyBy shuffle is achieved by wrapping a {@link FlinkKafkaShuffleProducer} and - * {@link FlinkKafkaShuffleConsumer} together. - * - *

    On the producer side, {@link FlinkKafkaShuffleProducer} is similar to {@link - * DataStream#keyBy(KeySelector)}. They use the same key group assignment function {@link - * KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes. - * Hence, each producer task can potentially write to each Kafka partition based on where the - * key goes. Here, `numberOfPartitions` equals to the key group size. In the case of using - * {@link TimeCharacteristic#EventTime}, each producer task broadcasts its watermark to ALL of - * the Kafka partitions to make sure watermark information is propagated correctly. - * - *

    On the consumer side, each consumer task should read partitions equal to the key group - * indices it is assigned. `numberOfPartitions` is the maximum parallelism of the consumer. This - * version only supports numberOfPartitions = consumerParallelism. In the case of using {@link - * TimeCharacteristic#EventTime}, a consumer task is responsible to emit watermarks. Watermarks - * are read from the corresponding Kafka partitions. Notice that a consumer task only starts to - * emit a watermark after reading at least one watermark from each producer task to make sure - * watermarks are monotonically increasing. Hence a consumer task needs to know - * `producerParallelism` as well. - * - * @see FlinkKafkaShuffle#writeKeyBy - * @see FlinkKafkaShuffle#readKeyBy - * @param dataStream Data stream to be shuffled - * @param topic Kafka topic written to - * @param producerParallelism Parallelism of producer - * @param numberOfPartitions Number of partitions - * @param properties Kafka properties - * @param fields Key positions from the input data stream - * @param Type of the input data stream - */ - public static KeyedStream persistentKeyBy( - DataStream dataStream, - String topic, - int producerParallelism, - int numberOfPartitions, - Properties properties, - int... fields) { - return persistentKeyBy( - dataStream, - topic, - producerParallelism, - numberOfPartitions, - properties, - keySelector(dataStream, fields)); - } - - /** - * The write side of {@link FlinkKafkaShuffle#persistentKeyBy}. - * - *

    This function contains a {@link FlinkKafkaShuffleProducer} to shuffle and persist data in - * Kafka. {@link FlinkKafkaShuffleProducer} uses the same key group assignment function {@link - * KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes. - * Hence, each producer task can potentially write to each Kafka partition based on the key. - * Here, the number of partitions equals to the key group size. In the case of using {@link - * TimeCharacteristic#EventTime}, each producer task broadcasts each watermark to all of the - * Kafka partitions to make sure watermark information is propagated properly. - * - *

    Attention: make sure kafkaProperties include {@link - * FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} - * explicitly. {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the - * producer. {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions. They are - * not necessarily the same and allowed to be set independently. - * - * @see FlinkKafkaShuffle#persistentKeyBy - * @see FlinkKafkaShuffle#readKeyBy - * @param dataStream Data stream to be shuffled - * @param topic Kafka topic written to - * @param kafkaProperties Kafka properties for Kafka Producer - * @param keySelector Key selector to retrieve key from `dataStream' - * @param Type of the input data stream - * @param Type of key - */ - public static void writeKeyBy( - DataStream dataStream, - String topic, - Properties kafkaProperties, - KeySelector keySelector) { - - StreamExecutionEnvironment env = dataStream.getExecutionEnvironment(); - TypeSerializer typeSerializer = dataStream.getType().createSerializer(env.getConfig()); - - // write data to Kafka - FlinkKafkaShuffleProducer kafkaProducer = - new FlinkKafkaShuffleProducer<>( - topic, - typeSerializer, - kafkaProperties, - env.clean(keySelector), - FlinkKafkaProducer.Semantic.EXACTLY_ONCE, - FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE); - - // make sure the sink parallelism is set to producerParallelism - Preconditions.checkArgument( - kafkaProperties.getProperty(PRODUCER_PARALLELISM) != null, - "Missing producer parallelism for Kafka Shuffle"); - int producerParallelism = - PropertiesUtil.getInt(kafkaProperties, PRODUCER_PARALLELISM, Integer.MIN_VALUE); - - addKafkaShuffle(dataStream, kafkaProducer, producerParallelism); - } - - /** - * The write side of {@link FlinkKafkaShuffle#persistentKeyBy}. - * - *

    This function contains a {@link FlinkKafkaShuffleProducer} to shuffle and persist data in - * Kafka. {@link FlinkKafkaShuffleProducer} uses the same key group assignment function {@link - * KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes. - * - *

    Hence, each producer task can potentially write to each Kafka partition based on the key. - * Here, the number of partitions equals to the key group size. In the case of using {@link - * TimeCharacteristic#EventTime}, each producer task broadcasts each watermark to all of the - * Kafka partitions to make sure watermark information is propagated properly. - * - *

    Attention: make sure kafkaProperties include {@link - * FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} - * explicitly. {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the - * producer. {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions. They are - * not necessarily the same and allowed to be set independently. - * - * @see FlinkKafkaShuffle#persistentKeyBy - * @see FlinkKafkaShuffle#readKeyBy - * @param dataStream Data stream to be shuffled - * @param topic Kafka topic written to - * @param kafkaProperties Kafka properties for Kafka Producer - * @param fields Key positions from the input data stream - * @param Type of the input data stream - */ - public static void writeKeyBy( - DataStream dataStream, String topic, Properties kafkaProperties, int... fields) { - writeKeyBy(dataStream, topic, kafkaProperties, keySelector(dataStream, fields)); - } - - /** - * The read side of {@link FlinkKafkaShuffle#persistentKeyBy}. - * - *

    Each consumer task should read kafka partitions equal to the key group indices it is - * assigned. The number of kafka partitions is the maximum parallelism of the consumer. This - * version only supports numberOfPartitions = consumerParallelism. In the case of using {@link - * TimeCharacteristic#EventTime}, a consumer task is responsible to emit watermarks. Watermarks - * are read from the corresponding Kafka partitions. Notice that a consumer task only starts to - * emit a watermark after receiving at least one watermark from each producer task to make sure - * watermarks are monotonically increasing. Hence a consumer task needs to know - * `producerParallelism` as well. - * - *

    Attention: make sure kafkaProperties include {@link - * FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} - * explicitly. {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the - * producer. {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions. They are - * not necessarily the same and allowed to be set independently. - * - * @see FlinkKafkaShuffle#persistentKeyBy - * @see FlinkKafkaShuffle#writeKeyBy - * @param topic The topic of Kafka where data is persisted - * @param env Execution environment. readKeyBy's environment can be different from writeKeyBy's - * @param typeInformation Type information of the data persisted in Kafka - * @param kafkaProperties kafka properties for Kafka Consumer - * @param keySelector key selector to retrieve key - * @param Schema type - * @param Key type - * @return Keyed data stream - */ - public static KeyedStream readKeyBy( - String topic, - StreamExecutionEnvironment env, - TypeInformation typeInformation, - Properties kafkaProperties, - KeySelector keySelector) { - - TypeSerializer typeSerializer = typeInformation.createSerializer(env.getConfig()); - TypeInformationSerializationSchema schema = - new TypeInformationSerializationSchema<>(typeInformation, typeSerializer); - - SourceFunction kafkaConsumer = - new FlinkKafkaShuffleConsumer<>(topic, schema, typeSerializer, kafkaProperties); - - // TODO: consider situations where numberOfPartitions != consumerParallelism - Preconditions.checkArgument( - kafkaProperties.getProperty(PARTITION_NUMBER) != null, - "Missing partition number for Kafka Shuffle"); - int numberOfPartitions = - PropertiesUtil.getInt(kafkaProperties, PARTITION_NUMBER, Integer.MIN_VALUE); - // Set the parallelism / max parallelism of the keyed stream in consumer side as the number - // of kafka partitions - DataStream outputDataStream = - env.addSource(kafkaConsumer) - .setParallelism(numberOfPartitions) - .setMaxParallelism(numberOfPartitions); - - return DataStreamUtils.reinterpretAsKeyedStream(outputDataStream, keySelector); - } - - /** - * Adds a {@link StreamKafkaShuffleSink} to {@link DataStream}. - * - *

    {@link StreamKafkaShuffleSink} is associated a {@link FlinkKafkaShuffleProducer}. - * - * @param inputStream Input data stream connected to the shuffle - * @param kafkaShuffleProducer Kafka shuffle sink function that can handle both records and - * watermark - * @param producerParallelism The number of tasks writing to the kafka shuffle - */ - private static void addKafkaShuffle( - DataStream inputStream, - FlinkKafkaShuffleProducer kafkaShuffleProducer, - int producerParallelism) { - - // read the output type of the input Transform to coax out errors about MissingTypeInfo - inputStream.getTransformation().getOutputType(); - - StreamKafkaShuffleSink shuffleSinkOperator = - new StreamKafkaShuffleSink<>(kafkaShuffleProducer); - LegacySinkTransformation transformation = - new LegacySinkTransformation<>( - inputStream.getTransformation(), - "kafka_shuffle", - shuffleSinkOperator, - inputStream.getExecutionEnvironment().getParallelism(), - false); - inputStream.getExecutionEnvironment().addOperator(transformation); - transformation.setParallelism(producerParallelism); - } - - // A better place to put this function is DataStream; but put it here for now to avoid changing - // DataStream - private static KeySelector keySelector(DataStream source, int... fields) { - KeySelector keySelector; - if (source.getType() instanceof BasicArrayTypeInfo - || source.getType() instanceof PrimitiveArrayTypeInfo) { - keySelector = KeySelectorUtil.getSelectorForArray(fields, source.getType()); - } else { - Keys keys = new Keys.ExpressionKeys<>(fields, source.getType()); - keySelector = - KeySelectorUtil.getSelectorForKeys( - keys, source.getType(), source.getExecutionEnvironment().getConfig()); - } - - return keySelector; - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffleConsumer.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffleConsumer.java deleted file mode 100644 index b96e9c0f5..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffleConsumer.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.shuffle; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; -import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; -import org.apache.flink.util.Preconditions; -import org.apache.flink.util.PropertiesUtil; -import org.apache.flink.util.SerializedValue; - -import java.util.Map; -import java.util.Properties; - -import static org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PRODUCER_PARALLELISM; - -/** Flink Kafka Shuffle Consumer Function. */ -@Internal -@Deprecated -public class FlinkKafkaShuffleConsumer extends FlinkKafkaConsumer { - private final TypeSerializer typeSerializer; - private final int producerParallelism; - - FlinkKafkaShuffleConsumer( - String topic, - TypeInformationSerializationSchema schema, - TypeSerializer typeSerializer, - Properties props) { - // The schema is needed to call the right FlinkKafkaConsumer constructor. - // It is never used, can be `null`, but `null` confuses the compiler. - super(topic, schema, props); - this.typeSerializer = typeSerializer; - - Preconditions.checkArgument( - props.getProperty(PRODUCER_PARALLELISM) != null, - "Missing producer parallelism for Kafka Shuffle"); - producerParallelism = PropertiesUtil.getInt(props, PRODUCER_PARALLELISM, Integer.MAX_VALUE); - } - - @Override - protected AbstractFetcher createFetcher( - SourceContext sourceContext, - Map assignedPartitionsWithInitialOffsets, - SerializedValue> watermarkStrategy, - StreamingRuntimeContext runtimeContext, - OffsetCommitMode offsetCommitMode, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - // make sure that auto commit is disabled when our offset commit mode is ON_CHECKPOINTS; - // this overwrites whatever setting the user configured in the properties - adjustAutoCommitConfig(properties, offsetCommitMode); - - return new KafkaShuffleFetcher<>( - sourceContext, - assignedPartitionsWithInitialOffsets, - watermarkStrategy, - runtimeContext.getProcessingTimeService(), - runtimeContext.getExecutionConfig().getAutoWatermarkInterval(), - runtimeContext.getUserCodeClassLoader(), - runtimeContext.getTaskNameWithSubtasks(), - deserializer, - properties, - pollTimeout, - runtimeContext.getMetricGroup(), - consumerMetricGroup, - useMetrics, - typeSerializer, - producerParallelism); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffleProducer.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffleProducer.java deleted file mode 100644 index 46754f270..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/FlinkKafkaShuffleProducer.java +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.shuffle; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.java.functions.KeySelector; -import org.apache.flink.core.memory.DataOutputSerializer; -import org.apache.flink.runtime.state.KeyGroupRangeAssignment; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaException; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionAssigner; -import org.apache.flink.util.Preconditions; -import org.apache.flink.util.PropertiesUtil; - -import org.apache.kafka.clients.producer.ProducerRecord; - -import java.io.IOException; -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import static org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PARTITION_NUMBER; - -/** - * Flink Kafka Shuffle Producer Function. It is different from {@link FlinkKafkaProducer} in the way - * handling elements and watermarks - */ -@Internal -@Deprecated -public class FlinkKafkaShuffleProducer extends FlinkKafkaProducer { - private final KafkaSerializer kafkaSerializer; - private final KeySelector keySelector; - private final int numberOfPartitions; - - private final Map subtaskToPartitionMap; - - FlinkKafkaShuffleProducer( - String defaultTopicId, - TypeSerializer typeSerializer, - Properties props, - KeySelector keySelector, - Semantic semantic, - int kafkaProducersPoolSize) { - super( - defaultTopicId, - (element, timestamp) -> null, - props, - semantic, - kafkaProducersPoolSize); - - this.kafkaSerializer = new KafkaSerializer<>(typeSerializer); - this.keySelector = keySelector; - - Preconditions.checkArgument( - props.getProperty(PARTITION_NUMBER) != null, - "Missing partition number for Kafka Shuffle"); - numberOfPartitions = PropertiesUtil.getInt(props, PARTITION_NUMBER, Integer.MIN_VALUE); - subtaskToPartitionMap = new HashMap<>(); - } - - /** - * This is the function invoked to handle each element. - * - * @param transaction Transaction state; elements are written to Kafka in transactions to - * guarantee different level of data consistency - * @param next Element to handle - * @param context Context needed to handle the element - * @throws FlinkKafkaException for kafka error - */ - @Override - public void invoke(KafkaTransactionState transaction, IN next, Context context) - throws FlinkKafkaException { - checkErroneous(); - - // write timestamp to Kafka if timestamp is available - Long timestamp = context.timestamp(); - - int[] partitions = getPartitions(transaction); - int partitionIndex; - try { - int subtaskIndex = - KeyGroupRangeAssignment.assignKeyToParallelOperator( - keySelector.getKey(next), partitions.length, partitions.length); - partitionIndex = subtaskToPartitionMap.get(subtaskIndex); - } catch (Exception e) { - throw new RuntimeException("Fail to assign a partition number to record", e); - } - - ProducerRecord record = - new ProducerRecord<>( - defaultTopicId, - partitionIndex, - timestamp, - null, - kafkaSerializer.serializeRecord(next, timestamp)); - - pendingRecords.incrementAndGet(); - transaction.getProducer().send(record, callback); - } - - /** - * This is the function invoked to handle each watermark. - * - * @param watermark Watermark to handle - * @throws FlinkKafkaException For kafka error - */ - public void invoke(Watermark watermark) throws FlinkKafkaException { - checkErroneous(); - KafkaTransactionState transaction = currentTransaction(); - - int[] partitions = getPartitions(transaction); - int subtask = getRuntimeContext().getIndexOfThisSubtask(); - - // broadcast watermark - long timestamp = watermark.getTimestamp(); - for (int partition : partitions) { - ProducerRecord record = - new ProducerRecord<>( - defaultTopicId, - partition, - timestamp, - null, - kafkaSerializer.serializeWatermark(watermark, subtask)); - - pendingRecords.incrementAndGet(); - transaction.getProducer().send(record, callback); - } - } - - private int[] getPartitions(KafkaTransactionState transaction) { - int[] partitions = topicPartitionsMap.get(defaultTopicId); - if (partitions == null) { - partitions = getPartitionsByTopic(defaultTopicId, transaction.getProducer()); - topicPartitionsMap.put(defaultTopicId, partitions); - for (int i = 0; i < partitions.length; i++) { - subtaskToPartitionMap.put( - KafkaTopicPartitionAssigner.assign( - defaultTopicId, partitions[i], partitions.length), - partitions[i]); - } - } - - Preconditions.checkArgument(partitions.length == numberOfPartitions); - - return partitions; - } - - /** Flink Kafka Shuffle Serializer. */ - public static final class KafkaSerializer implements Serializable { - public static final int TAG_REC_WITH_TIMESTAMP = 0; - public static final int TAG_REC_WITHOUT_TIMESTAMP = 1; - public static final int TAG_WATERMARK = 2; - - private static final long serialVersionUID = 2000002L; - // easy for updating SerDe format later - private static final int KAFKA_SHUFFLE_VERSION = 0; - - private final TypeSerializer serializer; - - private transient DataOutputSerializer dos; - - KafkaSerializer(TypeSerializer serializer) { - this.serializer = serializer; - } - - /** Format: Version(byte), TAG(byte), [timestamp(long)], record. */ - byte[] serializeRecord(IN record, Long timestamp) { - if (dos == null) { - dos = new DataOutputSerializer(16); - } - - try { - dos.write(KAFKA_SHUFFLE_VERSION); - - if (timestamp == null) { - dos.write(TAG_REC_WITHOUT_TIMESTAMP); - } else { - dos.write(TAG_REC_WITH_TIMESTAMP); - dos.writeLong(timestamp); - } - serializer.serialize(record, dos); - - } catch (IOException e) { - throw new RuntimeException("Unable to serialize record", e); - } - - byte[] ret = dos.getCopyOfBuffer(); - dos.clear(); - return ret; - } - - /** Format: Version(byte), TAG(byte), subtask(int), timestamp(long). */ - byte[] serializeWatermark(Watermark watermark, int subtask) { - if (dos == null) { - dos = new DataOutputSerializer(16); - } - - try { - dos.write(KAFKA_SHUFFLE_VERSION); - dos.write(TAG_WATERMARK); - dos.writeInt(subtask); - dos.writeLong(watermark.getTimestamp()); - } catch (IOException e) { - throw new RuntimeException("Unable to serialize watermark", e); - } - - byte[] ret = dos.getCopyOfBuffer(); - dos.clear(); - return ret; - } - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/StreamKafkaShuffleSink.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/StreamKafkaShuffleSink.java deleted file mode 100644 index e24e15650..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/shuffle/StreamKafkaShuffleSink.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.shuffle; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.streaming.api.operators.StreamOperator; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.api.watermark.Watermark; - -/** - * A customized {@link StreamOperator} for executing {@link FlinkKafkaShuffleProducer} that handle - * both elements and watermarks. If the shuffle sink is determined to be useful to other sinks in - * the future, we should abstract this operator to data stream api. For now, we keep the operator - * this way to avoid public interface change. - */ -@Internal -@Deprecated -class StreamKafkaShuffleSink extends StreamSink { - - public StreamKafkaShuffleSink(FlinkKafkaShuffleProducer flinkKafkaShuffleProducer) { - super(flinkKafkaShuffleProducer); - } - - @Override - public void processWatermark(Watermark mark) throws Exception { - super.processWatermark(mark); - ((FlinkKafkaShuffleProducer) userFunction).invoke(mark); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaConnectorOptionsUtil.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaConnectorOptionsUtil.java index 5960a709a..775af4f1c 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaConnectorOptionsUtil.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaConnectorOptionsUtil.java @@ -27,7 +27,6 @@ import org.apache.flink.connector.kafka.sink.KafkaPartitioner; import org.apache.flink.streaming.connectors.kafka.config.BoundedMode; import org.apache.flink.streaming.connectors.kafka.config.StartupMode; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.ScanBoundedMode; import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.ScanStartupMode; @@ -45,6 +44,8 @@ import org.apache.flink.util.InstantiationUtil; import org.apache.flink.util.Preconditions; +import org.apache.kafka.common.TopicPartition; + import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -250,7 +251,7 @@ private static boolean isSingleTopic(ReadableConfig tableOptions) { } public static StartupOptions getStartupOptions(ReadableConfig tableOptions) { - final Map specificOffsets = new HashMap<>(); + final Map specificOffsets = new HashMap<>(); final StartupMode startupMode = tableOptions .getOptional(SCAN_STARTUP_MODE) @@ -273,7 +274,7 @@ public static StartupOptions getStartupOptions(ReadableConfig tableOptions) { } public static BoundedOptions getBoundedOptions(ReadableConfig tableOptions) { - final Map specificOffsets = new HashMap<>(); + final Map specificOffsets = new HashMap<>(); final BoundedMode boundedMode = KafkaConnectorOptionsUtil.fromOption(tableOptions.get(SCAN_BOUNDED_MODE)); if (boundedMode == BoundedMode.SPECIFIC_OFFSETS) { @@ -290,32 +291,26 @@ public static BoundedOptions getBoundedOptions(ReadableConfig tableOptions) { } private static void buildSpecificOffsets( - ReadableConfig tableOptions, - String topic, - Map specificOffsets) { + ReadableConfig tableOptions, String topic, Map specificOffsets) { String specificOffsetsStrOpt = tableOptions.get(SCAN_STARTUP_SPECIFIC_OFFSETS); final Map offsetMap = parseSpecificOffsets(specificOffsetsStrOpt, SCAN_STARTUP_SPECIFIC_OFFSETS.key()); offsetMap.forEach( (partition, offset) -> { - final KafkaTopicPartition topicPartition = - new KafkaTopicPartition(topic, partition); + final TopicPartition topicPartition = new TopicPartition(topic, partition); specificOffsets.put(topicPartition, offset); }); } public static void buildBoundedOffsets( - ReadableConfig tableOptions, - String topic, - Map specificOffsets) { + ReadableConfig tableOptions, String topic, Map specificOffsets) { String specificOffsetsEndOpt = tableOptions.get(SCAN_BOUNDED_SPECIFIC_OFFSETS); final Map offsetMap = parseSpecificOffsets(specificOffsetsEndOpt, SCAN_BOUNDED_SPECIFIC_OFFSETS.key()); offsetMap.forEach( (partition, offset) -> { - final KafkaTopicPartition topicPartition = - new KafkaTopicPartition(topic, partition); + final TopicPartition topicPartition = new TopicPartition(topic, partition); specificOffsets.put(topicPartition, offset); }); } @@ -668,14 +663,14 @@ static void validateDeliveryGuarantee(ReadableConfig tableOptions) { /** Kafka startup options. * */ public static class StartupOptions { public StartupMode startupMode; - public Map specificOffsets; + public Map specificOffsets; public long startupTimestampMillis; } /** Kafka bounded options. * */ public static class BoundedOptions { public BoundedMode boundedMode; - public Map specificOffsets; + public Map specificOffsets; public long boundedTimestampMillis; } diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java index c963da762..6c2a7f93c 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java @@ -34,7 +34,6 @@ import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; import org.apache.flink.streaming.connectors.kafka.config.BoundedMode; import org.apache.flink.streaming.connectors.kafka.config.StartupMode; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.table.DynamicKafkaDeserializationSchema.MetadataConverter; import org.apache.flink.table.api.DataTypes; import org.apache.flink.table.connector.ChangelogMode; @@ -143,7 +142,7 @@ public class KafkaDynamicSource * Specific startup offsets; only relevant when startup mode is {@link * StartupMode#SPECIFIC_OFFSETS}. */ - protected final Map specificStartupOffsets; + protected final Map specificStartupOffsets; /** * The start timestamp to locate partition offsets; only relevant when startup mode is {@link @@ -158,7 +157,7 @@ public class KafkaDynamicSource * Specific end offsets; only relevant when bounded mode is {@link * BoundedMode#SPECIFIC_OFFSETS}. */ - protected final Map specificBoundedOffsets; + protected final Map specificBoundedOffsets; /** * The bounded timestamp to locate partition offsets; only relevant when bounded mode is {@link @@ -182,10 +181,10 @@ public KafkaDynamicSource( @Nullable Pattern topicPattern, Properties properties, StartupMode startupMode, - Map specificStartupOffsets, + Map specificStartupOffsets, long startupTimestampMillis, BoundedMode boundedMode, - Map specificBoundedOffsets, + Map specificBoundedOffsets, long boundedTimestampMillis, boolean upsertMode, String tableIdentifier) { @@ -452,8 +451,7 @@ protected KafkaSource createKafkaSource( specificStartupOffsets.forEach( (tp, offset) -> offsets.put( - new TopicPartition(tp.getTopic(), tp.getPartition()), - offset)); + new TopicPartition(tp.topic(), tp.partition()), offset)); kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.offsets(offsets)); break; case TIMESTAMP: @@ -477,8 +475,7 @@ protected KafkaSource createKafkaSource( specificBoundedOffsets.forEach( (tp, offset) -> offsets.put( - new TopicPartition(tp.getTopic(), tp.getPartition()), - offset)); + new TopicPartition(tp.topic(), tp.partition()), offset)); kafkaSourceBuilder.setBounded(OffsetsInitializer.offsets(offsets)); break; case TIMESTAMP: diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactory.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactory.java index 8124691a5..15958f17e 100644 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactory.java +++ b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactory.java @@ -30,7 +30,6 @@ import org.apache.flink.connector.kafka.source.KafkaSourceOptions; import org.apache.flink.streaming.connectors.kafka.config.BoundedMode; import org.apache.flink.streaming.connectors.kafka.config.StartupMode; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.BoundedOptions; import org.apache.flink.table.api.ValidationException; import org.apache.flink.table.catalog.ObjectIdentifier; @@ -49,6 +48,7 @@ import org.apache.flink.table.types.DataType; import org.apache.flink.types.RowKind; +import org.apache.kafka.common.TopicPartition; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -391,10 +391,10 @@ protected KafkaDynamicSource createKafkaTableSource( @Nullable Pattern topicPattern, Properties properties, StartupMode startupMode, - Map specificStartupOffsets, + Map specificStartupOffsets, long startupTimestampMillis, BoundedMode boundedMode, - Map specificEndOffsets, + Map specificEndOffsets, long endTimestampMillis, String tableIdentifier) { return new KafkaDynamicSource( diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/KeyedDeserializationSchema.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/KeyedDeserializationSchema.java deleted file mode 100644 index d3150b98e..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/KeyedDeserializationSchema.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.util.serialization; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; - -import org.apache.kafka.clients.consumer.ConsumerRecord; - -import java.io.IOException; - -/** - * The deserialization schema describes how to turn the byte key / value messages delivered by - * certain data sources (for example Apache Kafka) into data types (Java/Scala objects) that are - * processed by Flink. - * - * @param The type created by the keyed deserialization schema. - * @deprecated Use {@link KafkaDeserializationSchema}. - */ -@Deprecated -@PublicEvolving -public interface KeyedDeserializationSchema extends KafkaDeserializationSchema { - /** - * Deserializes the byte message. - * - * @param messageKey the key as a byte array (null if no key has been set). - * @param message The message, as a byte array (null if the message was empty or deleted). - * @param partition The partition the message has originated from. - * @param offset the offset of the message in the original source (for example the Kafka - * offset). - * @return The deserialized message as an object (null if the message cannot be deserialized). - */ - T deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) - throws IOException; - - @Override - default T deserialize(ConsumerRecord record) throws IOException { - return deserialize( - record.key(), record.value(), record.topic(), record.partition(), record.offset()); - } -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/KeyedSerializationSchema.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/KeyedSerializationSchema.java deleted file mode 100644 index b777419fe..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/KeyedSerializationSchema.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.util.serialization; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; - -import java.io.Serializable; - -/** - * The serialization schema describes how to turn a data object into a different serialized - * representation. Most data sinks (for example Apache Kafka) require the data to be handed to them - * in a specific format (for example as byte strings). - * - * @param The type to be serialized. - * @deprecated Use {@link KafkaSerializationSchema}. - */ -@Deprecated -@PublicEvolving -public interface KeyedSerializationSchema extends Serializable { - - /** - * Serializes the key of the incoming element to a byte array This method might return null if - * no key is available. - * - * @param element The incoming element to be serialized - * @return the key of the element as a byte array - */ - byte[] serializeKey(T element); - - /** - * Serializes the value of the incoming element to a byte array. - * - * @param element The incoming element to be serialized - * @return the value of the element as a byte array - */ - byte[] serializeValue(T element); - - /** - * Optional method to determine the target topic for the element. - * - * @param element Incoming element to determine the target topic from - * @return null or the target topic - */ - String getTargetTopic(T element); -} diff --git a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/TypeInformationKeyValueSerializationSchema.java b/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/TypeInformationKeyValueSerializationSchema.java deleted file mode 100644 index 05e0eaea1..000000000 --- a/flink-connector-kafka/src/main/java/org/apache/flink/streaming/util/serialization/TypeInformationKeyValueSerializationSchema.java +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.util.serialization; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.api.java.typeutils.TupleTypeInfo; -import org.apache.flink.api.java.typeutils.TypeExtractor; -import org.apache.flink.core.memory.DataInputDeserializer; -import org.apache.flink.core.memory.DataOutputSerializer; -import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; - -import org.apache.kafka.clients.consumer.ConsumerRecord; - -import java.io.IOException; - -/** - * A serialization and deserialization schema for Key Value Pairs that uses Flink's serialization - * stack to transform typed from and to byte arrays. - * - * @param The key type to be serialized. - * @param The value type to be serialized. - */ -@PublicEvolving -@Deprecated -public class TypeInformationKeyValueSerializationSchema - implements KafkaDeserializationSchema>, - KeyedSerializationSchema> { - - private static final long serialVersionUID = -5359448468131559102L; - - /** The serializer for the key. */ - private final TypeSerializer keySerializer; - - /** The serializer for the value. */ - private final TypeSerializer valueSerializer; - - /** reusable input deserialization buffer. */ - private final DataInputDeserializer inputDeserializer; - - /** reusable output serialization buffer for the key. */ - private transient DataOutputSerializer keyOutputSerializer; - - /** reusable output serialization buffer for the value. */ - private transient DataOutputSerializer valueOutputSerializer; - - /** - * The type information, to be returned by {@link #getProducedType()}. It is transient, because - * it is not serializable. Note that this means that the type information is not available at - * runtime, but only prior to the first serialization / deserialization - */ - private final transient TypeInformation> typeInfo; - - // ------------------------------------------------------------------------ - - /** - * Creates a new de-/serialization schema for the given types. - * - * @param keyTypeInfo The type information for the key type de-/serialized by this schema. - * @param valueTypeInfo The type information for the value type de-/serialized by this schema. - * @param ec The execution config, which is used to parametrize the type serializers. - */ - public TypeInformationKeyValueSerializationSchema( - TypeInformation keyTypeInfo, TypeInformation valueTypeInfo, ExecutionConfig ec) { - this.typeInfo = new TupleTypeInfo<>(keyTypeInfo, valueTypeInfo); - this.keySerializer = keyTypeInfo.createSerializer(ec); - this.valueSerializer = valueTypeInfo.createSerializer(ec); - this.inputDeserializer = new DataInputDeserializer(); - } - - /** - * Creates a new de-/serialization schema for the given types. This constructor accepts the - * types as classes and internally constructs the type information from the classes. - * - *

    If the types are parametrized and cannot be fully defined via classes, use the constructor - * that accepts {@link TypeInformation} instead. - * - * @param keyClass The class of the key de-/serialized by this schema. - * @param valueClass The class of the value de-/serialized by this schema. - * @param config The execution config, which is used to parametrize the type serializers. - */ - public TypeInformationKeyValueSerializationSchema( - Class keyClass, Class valueClass, ExecutionConfig config) { - this( - TypeExtractor.createTypeInfo(keyClass), - TypeExtractor.createTypeInfo(valueClass), - config); - } - - // ------------------------------------------------------------------------ - - @Override - public Tuple2 deserialize(ConsumerRecord record) throws Exception { - K key = null; - V value = null; - - if (record.key() != null) { - inputDeserializer.setBuffer(record.key()); - key = keySerializer.deserialize(inputDeserializer); - } - if (record.value() != null) { - inputDeserializer.setBuffer(record.value()); - value = valueSerializer.deserialize(inputDeserializer); - } - return new Tuple2<>(key, value); - } - - /** - * This schema never considers an element to signal end-of-stream, so this method returns always - * false. - * - * @param nextElement The element to test for the end-of-stream signal. - * @return Returns false. - */ - @Override - public boolean isEndOfStream(Tuple2 nextElement) { - return false; - } - - @Override - public byte[] serializeKey(Tuple2 element) { - if (element.f0 == null) { - return null; - } else { - // key is not null. serialize it: - if (keyOutputSerializer == null) { - keyOutputSerializer = new DataOutputSerializer(16); - } - try { - keySerializer.serialize(element.f0, keyOutputSerializer); - } catch (IOException e) { - throw new RuntimeException("Unable to serialize record", e); - } - // check if key byte array size changed - byte[] res = keyOutputSerializer.getByteArray(); - if (res.length != keyOutputSerializer.length()) { - byte[] n = new byte[keyOutputSerializer.length()]; - System.arraycopy(res, 0, n, 0, keyOutputSerializer.length()); - res = n; - } - keyOutputSerializer.clear(); - return res; - } - } - - @Override - public byte[] serializeValue(Tuple2 element) { - // if the value is null, its serialized value is null as well. - if (element.f1 == null) { - return null; - } - - if (valueOutputSerializer == null) { - valueOutputSerializer = new DataOutputSerializer(16); - } - - try { - valueSerializer.serialize(element.f1, valueOutputSerializer); - } catch (IOException e) { - throw new RuntimeException("Unable to serialize record", e); - } - - byte[] res = valueOutputSerializer.getByteArray(); - if (res.length != valueOutputSerializer.length()) { - byte[] n = new byte[valueOutputSerializer.length()]; - System.arraycopy(res, 0, n, 0, valueOutputSerializer.length()); - res = n; - } - valueOutputSerializer.clear(); - return res; - } - - @Override - public String getTargetTopic(Tuple2 element) { - return null; // we are never overriding the topic - } - - @Override - public TypeInformation> getProducedType() { - if (typeInfo != null) { - return typeInfo; - } else { - throw new IllegalStateException( - "The type information is not available after this class has been serialized and distributed."); - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilderTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilderTest.java index 4d1437288..75feb292b 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilderTest.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/sink/KafkaRecordSerializationSchemaBuilderTest.java @@ -29,7 +29,6 @@ import org.apache.flink.connector.kafka.lineage.TypeDatasetFacet; import org.apache.flink.connector.kafka.lineage.TypeDatasetFacetProvider; import org.apache.flink.connector.testutils.formats.DummyInitializationContext; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.util.TestLogger; import org.apache.kafka.clients.producer.ProducerRecord; @@ -150,8 +149,7 @@ public void testSerializeRecordWithTopicSelector() { public void testSerializeRecordWithPartitioner() throws Exception { AtomicBoolean opened = new AtomicBoolean(false); final int partition = 5; - final FlinkKafkaPartitioner partitioner = - new ConstantPartitioner<>(opened, partition); + final KafkaPartitioner partitioner = new ConstantPartitioner<>(opened, partition); final KafkaRecordSerializationSchema schema = KafkaRecordSerializationSchema.builder() .setTopic(DEFAULT_TOPIC) @@ -496,7 +494,7 @@ public int[] getPartitionsForTopic(String topic) { } } - private static class ConstantPartitioner extends FlinkKafkaPartitioner { + private static class ConstantPartitioner implements KafkaPartitioner { private final AtomicBoolean opened; private final int partition; diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceLegacyITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceLegacyITCase.java deleted file mode 100644 index e9c9cab81..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceLegacyITCase.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.connector.kafka.source; - -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; -import org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase; -import org.apache.flink.streaming.connectors.kafka.KafkaProducerTestBase; -import org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironmentImpl; - -import org.junit.After; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; - -import static org.apache.flink.connector.kafka.testutils.KafkaUtil.checkProducerLeak; - -/** - * An IT case class that runs all the IT cases of the legacy {@link - * org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer} with the new {@link KafkaSource}. - */ -public class KafkaSourceLegacyITCase extends KafkaConsumerTestBase { - - public KafkaSourceLegacyITCase() throws Exception { - super(true); - } - - @BeforeClass - public static void prepare() throws Exception { - KafkaProducerTestBase.prepare(); - ((KafkaTestEnvironmentImpl) kafkaServer) - .setProducerSemantic(FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); - } - - @After - public void check() { - checkProducerLeak(); - } - - @Test - public void testFailOnNoBroker() throws Exception { - runFailOnNoBrokerTest(); - } - - @Test - public void testConcurrentProducerConsumerTopology() throws Exception { - runSimpleConcurrentProducerConsumerTopology(); - } - - @Test - public void testKeyValueSupport() throws Exception { - runKeyValueTest(); - } - - // --- canceling / failures --- - - @Test - public void testCancelingEmptyTopic() throws Exception { - runCancelingOnEmptyInputTest(); - } - - @Test - public void testCancelingFullTopic() throws Exception { - runCancelingOnFullInputTest(); - } - - // --- source to partition mappings and exactly once --- - - @Test - public void testOneToOneSources() throws Exception { - runOneToOneExactlyOnceTest(); - } - - @Test - public void testOneSourceMultiplePartitions() throws Exception { - runOneSourceMultiplePartitionsExactlyOnceTest(); - } - - @Test - public void testMultipleSourcesOnePartition() throws Exception { - runMultipleSourcesOnePartitionExactlyOnceTest(); - } - - // --- broker failure --- - - @Test - @Ignore("FLINK-28267") - public void testBrokerFailure() throws Exception { - runBrokerFailureTest(); - } - - // --- special executions --- - - @Test - public void testBigRecordJob() throws Exception { - runBigRecordTestTopology(); - } - - @Test - public void testMultipleTopicsWithLegacySerializer() throws Exception { - runProduceConsumeMultipleTopics(true); - } - - @Test - public void testMultipleTopicsWithKafkaSerializer() throws Exception { - runProduceConsumeMultipleTopics(false); - } - - @Test - public void testAllDeletes() throws Exception { - runAllDeletesTest(); - } - - // --- startup mode --- - - @Test - public void testStartFromEarliestOffsets() throws Exception { - runStartFromEarliestOffsets(); - } - - @Test - public void testStartFromLatestOffsets() throws Exception { - runStartFromLatestOffsets(); - } - - @Test - public void testStartFromGroupOffsets() throws Exception { - runStartFromGroupOffsets(); - } - - @Test - public void testStartFromSpecificOffsets() throws Exception { - runStartFromSpecificOffsets(); - } - - @Test - public void testStartFromTimestamp() throws Exception { - runStartFromTimestamp(); - } - - // --- offset committing --- - - @Test - public void testCommitOffsetsToKafka() throws Exception { - runCommitOffsetsToKafka(); - } - - @Test - public void testAutoOffsetRetrievalAndCommitToKafka() throws Exception { - runAutoOffsetRetrievalAndCommitToKafka(); - } - - @Test - public void testCollectingSchema() throws Exception { - runCollectingSchemaTest(); - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBaseMigrationTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBaseMigrationTest.java deleted file mode 100644 index 47bce8bd9..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBaseMigrationTest.java +++ /dev/null @@ -1,436 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.FlinkVersion; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.core.testutils.OneShotLatch; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.checkpoint.OperatorSubtaskState; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.StreamSource; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionStateSentinel; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor; -import org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness; -import org.apache.flink.streaming.util.OperatorSnapshotUtil; -import org.apache.flink.util.SerializedValue; - -import org.junit.Ignore; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.mock; -import static org.powermock.api.mockito.PowerMockito.doAnswer; -import static org.powermock.api.mockito.PowerMockito.when; - -/** - * Tests for checking whether {@link FlinkKafkaConsumerBase} can restore from snapshots that were - * done using previous Flink versions' {@link FlinkKafkaConsumerBase}. - * - *

    For regenerating the binary snapshot files run {@link #writeSnapshot()} on the corresponding - * Flink release-* branch. - */ -@RunWith(Parameterized.class) -public class FlinkKafkaConsumerBaseMigrationTest { - - /** - * TODO change this to the corresponding savepoint version to be written (e.g. {@link - * FlinkVersion#v1_3} for 1.3) TODO and remove all @Ignore annotations on write*Snapshot() - * methods to generate savepoints TODO Note: You should generate the savepoint based on the - * release branch instead of the master. - */ - private final FlinkVersion flinkGenerateSavepointVersion = null; - - private static final HashMap PARTITION_STATE = new HashMap<>(); - - static { - PARTITION_STATE.put(new KafkaTopicPartition("abc", 13), 16768L); - PARTITION_STATE.put(new KafkaTopicPartition("def", 7), 987654321L); - } - - private static final List TOPICS = - new ArrayList<>(PARTITION_STATE.keySet()) - .stream().map(p -> p.getTopic()).distinct().collect(Collectors.toList()); - - private final FlinkVersion testMigrateVersion; - - @Parameterized.Parameters(name = "Migration Savepoint: {0}") - public static Collection parameters() { - return FlinkVersion.rangeOf(FlinkVersion.v1_8, FlinkVersion.current()); - } - - public FlinkKafkaConsumerBaseMigrationTest(FlinkVersion testMigrateVersion) { - this.testMigrateVersion = testMigrateVersion; - } - - /** Manually run this to write binary snapshot data. */ - @Ignore - @Test - public void writeSnapshot() throws Exception { - writeSnapshot( - "src/test/resources/kafka-consumer-migration-test-flink" - + flinkGenerateSavepointVersion - + "-snapshot", - PARTITION_STATE); - - final HashMap emptyState = new HashMap<>(); - writeSnapshot( - "src/test/resources/kafka-consumer-migration-test-flink" - + flinkGenerateSavepointVersion - + "-empty-state-snapshot", - emptyState); - } - - private void writeSnapshot(String path, HashMap state) - throws Exception { - - final OneShotLatch latch = new OneShotLatch(); - final AbstractFetcher fetcher = mock(AbstractFetcher.class); - - doAnswer( - new Answer() { - @Override - public Void answer(InvocationOnMock invocation) throws Throwable { - latch.trigger(); - return null; - } - }) - .when(fetcher) - .runFetchLoop(); - - when(fetcher.snapshotCurrentState()).thenReturn(state); - - final List partitions = new ArrayList<>(PARTITION_STATE.keySet()); - - final DummyFlinkKafkaConsumer consumerFunction = - new DummyFlinkKafkaConsumer<>( - fetcher, - TOPICS, - partitions, - FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED); - - StreamSource> consumerOperator = - new StreamSource<>(consumerFunction); - - final AbstractStreamOperatorTestHarness testHarness = - new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0); - - testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); - - testHarness.setup(); - testHarness.open(); - - final Throwable[] error = new Throwable[1]; - - // run the source asynchronously - Thread runner = - new Thread() { - @Override - public void run() { - try { - consumerFunction.run( - new DummySourceContext() { - @Override - public void collect(String element) {} - }); - } catch (Throwable t) { - t.printStackTrace(); - error[0] = t; - } - } - }; - runner.start(); - - if (!latch.isTriggered()) { - latch.await(); - } - - final OperatorSubtaskState snapshot; - synchronized (testHarness.getCheckpointLock()) { - snapshot = testHarness.snapshot(0L, 0L); - } - - OperatorSnapshotUtil.writeStateHandle(snapshot, path); - - consumerOperator.close(); - runner.join(); - } - - /** Test restoring from an legacy empty state, when no partitions could be found for topics. */ - @Test - public void testRestoreFromEmptyStateNoPartitions() throws Exception { - final DummyFlinkKafkaConsumer consumerFunction = - new DummyFlinkKafkaConsumer<>( - Collections.singletonList("dummy-topic"), - Collections.emptyList(), - FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED); - - StreamSource> consumerOperator = - new StreamSource<>(consumerFunction); - - final AbstractStreamOperatorTestHarness testHarness = - new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0); - - testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); - - testHarness.setup(); - - // restore state from binary snapshot file - testHarness.initializeState( - OperatorSnapshotUtil.getResourceFilename( - "kafka-consumer-migration-test-flink" - + testMigrateVersion - + "-empty-state-snapshot")); - - testHarness.open(); - - // assert that no partitions were found and is empty - assertThat(consumerFunction.getSubscribedPartitionsToStartOffsets()).isEmpty(); - - // assert that no state was restored - assertThat(consumerFunction.getRestoredState()).isEmpty(); - - consumerOperator.close(); - consumerOperator.cancel(); - } - - /** - * Test restoring from an empty state taken using a previous Flink version, when some partitions - * could be found for topics. - */ - @Test - public void testRestoreFromEmptyStateWithPartitions() throws Exception { - final List partitions = new ArrayList<>(PARTITION_STATE.keySet()); - - final DummyFlinkKafkaConsumer consumerFunction = - new DummyFlinkKafkaConsumer<>( - TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED); - - StreamSource> consumerOperator = - new StreamSource<>(consumerFunction); - - final AbstractStreamOperatorTestHarness testHarness = - new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0); - - testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); - - testHarness.setup(); - - // restore state from binary snapshot file - testHarness.initializeState( - OperatorSnapshotUtil.getResourceFilename( - "kafka-consumer-migration-test-flink" - + testMigrateVersion - + "-empty-state-snapshot")); - - testHarness.open(); - - // the expected state in "kafka-consumer-migration-test-flink1.x-snapshot-empty-state"; - // all new partitions after the snapshot are considered as partitions that were created - // while the - // consumer wasn't running, and should start from the earliest offset. - final HashMap expectedSubscribedPartitionsWithStartOffsets = - new HashMap<>(); - for (KafkaTopicPartition partition : PARTITION_STATE.keySet()) { - expectedSubscribedPartitionsWithStartOffsets.put( - partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET); - } - - // assert that there are partitions and is identical to expected list - assertThat(consumerFunction.getSubscribedPartitionsToStartOffsets()) - .isNotEmpty() - .isEqualTo(expectedSubscribedPartitionsWithStartOffsets); - - // the new partitions should have been considered as restored state - assertThat(consumerFunction.getRestoredState()).isNotNull(); - assertThat(consumerFunction.getSubscribedPartitionsToStartOffsets()).isNotEmpty(); - for (Map.Entry expectedEntry : - expectedSubscribedPartitionsWithStartOffsets.entrySet()) { - assertThat(consumerFunction.getRestoredState()) - .containsEntry(expectedEntry.getKey(), expectedEntry.getValue()); - } - - consumerOperator.close(); - consumerOperator.cancel(); - } - - /** - * Test restoring from a non-empty state taken using a previous Flink version, when some - * partitions could be found for topics. - */ - @Test - public void testRestore() throws Exception { - final List partitions = new ArrayList<>(PARTITION_STATE.keySet()); - - final DummyFlinkKafkaConsumer consumerFunction = - new DummyFlinkKafkaConsumer<>( - TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED); - - StreamSource> consumerOperator = - new StreamSource<>(consumerFunction); - - final AbstractStreamOperatorTestHarness testHarness = - new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0); - - testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); - - testHarness.setup(); - - // restore state from binary snapshot file - testHarness.initializeState( - OperatorSnapshotUtil.getResourceFilename( - "kafka-consumer-migration-test-flink" + testMigrateVersion + "-snapshot")); - - testHarness.open(); - - // assert that there are partitions and is identical to expected list - assertThat(consumerFunction.getSubscribedPartitionsToStartOffsets()) - .isNotEmpty() - // on restore, subscribedPartitionsToStartOffsets should be identical to the - // restored state - .isEqualTo(PARTITION_STATE); - - // assert that state is correctly restored from legacy checkpoint - assertThat(consumerFunction.getRestoredState()).isNotNull().isEqualTo(PARTITION_STATE); - - consumerOperator.close(); - consumerOperator.cancel(); - } - - // ------------------------------------------------------------------------ - - private static class DummyFlinkKafkaConsumer extends FlinkKafkaConsumerBase { - private static final long serialVersionUID = 1L; - - private final List partitions; - - private final AbstractFetcher fetcher; - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer( - AbstractFetcher fetcher, - List topics, - List partitions, - long discoveryInterval) { - - super( - topics, - null, - (KafkaDeserializationSchema) mock(KafkaDeserializationSchema.class), - discoveryInterval, - false); - - this.fetcher = fetcher; - this.partitions = partitions; - } - - DummyFlinkKafkaConsumer( - List topics, List partitions, long discoveryInterval) { - this(mock(AbstractFetcher.class), topics, partitions, discoveryInterval); - } - - @Override - protected AbstractFetcher createFetcher( - SourceContext sourceContext, - Map thisSubtaskPartitionsWithStartOffsets, - SerializedValue> watermarkStrategy, - StreamingRuntimeContext runtimeContext, - OffsetCommitMode offsetCommitMode, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - return fetcher; - } - - @Override - protected AbstractPartitionDiscoverer createPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks) { - - AbstractPartitionDiscoverer mockPartitionDiscoverer = - mock(AbstractPartitionDiscoverer.class); - - try { - when(mockPartitionDiscoverer.discoverPartitions()).thenReturn(partitions); - } catch (Exception e) { - // ignore - } - when(mockPartitionDiscoverer.setAndCheckDiscoveredPartition( - any(KafkaTopicPartition.class))) - .thenReturn(true); - - return mockPartitionDiscoverer; - } - - @Override - protected boolean getIsAutoCommitEnabled() { - return false; - } - - @Override - protected Map fetchOffsetsWithTimestamp( - Collection partitions, long timestamp) { - throw new UnsupportedOperationException(); - } - } - - private abstract static class DummySourceContext - implements SourceFunction.SourceContext { - - private final Object lock = new Object(); - - @Override - public void collectWithTimestamp(String element, long timestamp) {} - - @Override - public void emitWatermark(Watermark mark) {} - - @Override - public Object getCheckpointLock() { - return lock; - } - - @Override - public void close() {} - - @Override - public void markAsTemporarilyIdle() {} - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBaseTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBaseTest.java deleted file mode 100644 index a2438165e..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerBaseTest.java +++ /dev/null @@ -1,1523 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.api.common.state.BroadcastState; -import org.apache.flink.api.common.state.KeyedStateStore; -import org.apache.flink.api.common.state.ListState; -import org.apache.flink.api.common.state.ListStateDescriptor; -import org.apache.flink.api.common.state.MapStateDescriptor; -import org.apache.flink.api.common.state.OperatorStateStore; -import org.apache.flink.api.common.typeinfo.TypeHint; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.api.java.typeutils.runtime.TupleSerializer; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.core.testutils.CheckedThread; -import org.apache.flink.core.testutils.OneShotLatch; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.runtime.checkpoint.OperatorSubtaskState; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; -import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.StreamSource; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; -import org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaCommitCallback; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor; -import org.apache.flink.streaming.connectors.kafka.testutils.TestPartitionDiscoverer; -import org.apache.flink.streaming.connectors.kafka.testutils.TestSourceContext; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; -import org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness; -import org.apache.flink.streaming.util.MockDeserializationSchema; -import org.apache.flink.streaming.util.MockStreamingRuntimeContext; -import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; -import org.apache.flink.util.ExceptionUtils; -import org.apache.flink.util.FlinkException; -import org.apache.flink.util.InstantiationUtil; -import org.apache.flink.util.Preconditions; -import org.apache.flink.util.SerializedValue; -import org.apache.flink.util.TestLogger; -import org.apache.flink.util.function.SupplierWithException; -import org.apache.flink.util.function.ThrowingRunnable; - -import org.junit.Test; - -import javax.annotation.Nonnull; - -import java.io.Serializable; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.OptionalLong; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.stream.Collectors; - -import static org.apache.flink.util.Preconditions.checkState; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.assertj.core.api.Assertions.fail; -import static org.assertj.core.api.HamcrestCondition.matching; -import static org.hamcrest.Matchers.everyItem; -import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.collection.IsIn.isIn; -import static org.hamcrest.collection.IsMapContaining.hasKey; -import static org.hamcrest.core.IsNot.not; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; - -/** Tests for the {@link FlinkKafkaConsumerBase}. */ -public class FlinkKafkaConsumerBaseTest extends TestLogger { - - private static final int maxParallelism = Short.MAX_VALUE / 2; - - /** Tests that not both types of timestamp extractors / watermark generators can be used. */ - @Test - @SuppressWarnings("unchecked") - public void testEitherWatermarkExtractor() { - assertThatThrownBy( - () -> - new DummyFlinkKafkaConsumer() - .assignTimestampsAndWatermarks( - (AssignerWithPeriodicWatermarks) null)) - .isInstanceOf(NullPointerException.class); - - assertThatThrownBy( - () -> - new DummyFlinkKafkaConsumer() - .assignTimestampsAndWatermarks( - (AssignerWithPunctuatedWatermarks) null)) - .isInstanceOf(NullPointerException.class); - - final AssignerWithPeriodicWatermarks periodicAssigner = - mock(AssignerWithPeriodicWatermarks.class); - final AssignerWithPunctuatedWatermarks punctuatedAssigner = - mock(AssignerWithPunctuatedWatermarks.class); - - DummyFlinkKafkaConsumer c1 = new DummyFlinkKafkaConsumer<>(); - c1.assignTimestampsAndWatermarks(periodicAssigner); - assertThatThrownBy(() -> c1.assignTimestampsAndWatermarks(punctuatedAssigner)) - .isInstanceOf(IllegalStateException.class); - - DummyFlinkKafkaConsumer c2 = new DummyFlinkKafkaConsumer<>(); - c2.assignTimestampsAndWatermarks(punctuatedAssigner); - assertThatThrownBy(() -> c2.assignTimestampsAndWatermarks(periodicAssigner)) - .isInstanceOf(IllegalStateException.class); - } - - /** Tests that no checkpoints happen when the fetcher is not running. */ - @Test - public void ignoreCheckpointWhenNotRunning() throws Exception { - @SuppressWarnings("unchecked") - final MockFetcher fetcher = new MockFetcher<>(); - final FlinkKafkaConsumerBase consumer = - new DummyFlinkKafkaConsumer<>( - fetcher, mock(AbstractPartitionDiscoverer.class), false); - - final TestingListState> listState = - new TestingListState<>(); - setupConsumer(consumer, false, listState, true, 0, 1); - - // snapshot before the fetcher starts running - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(1, 1)); - - // no state should have been checkpointed - assertThat(listState.get().iterator().hasNext()).isFalse(); - - // acknowledgement of the checkpoint should also not result in any offset commits - consumer.notifyCheckpointComplete(1L); - assertThat(fetcher.getAndClearLastCommittedOffsets()).isNull(); - assertThat(fetcher.getCommitCount()).isEqualTo(0); - } - - /** - * Tests that when taking a checkpoint when the fetcher is not running yet, the checkpoint - * correctly contains the restored state instead. - */ - @Test - public void checkRestoredCheckpointWhenFetcherNotReady() throws Exception { - @SuppressWarnings("unchecked") - final FlinkKafkaConsumerBase consumer = new DummyFlinkKafkaConsumer<>(); - - final TestingListState> restoredListState = - new TestingListState<>(); - setupConsumer(consumer, true, restoredListState, true, 0, 1); - - // snapshot before the fetcher starts running - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(17, 17)); - - // ensure that the list was cleared and refilled. while this is an implementation detail, we - // use it here - // to figure out that snapshotState() actually did something. - assertThat(restoredListState.isClearCalled()).isTrue(); - - Set expected = new HashSet<>(); - - for (Serializable serializable : restoredListState.get()) { - expected.add(serializable); - } - - int counter = 0; - - for (Serializable serializable : restoredListState.get()) { - assertThat(expected).contains(serializable); - counter++; - } - - assertThat(counter).isEqualTo(expected.size()); - } - - @Test - public void testConfigureOnCheckpointsCommitMode() throws Exception { - @SuppressWarnings("unchecked") - // auto-commit enabled; this should be ignored in this case - final DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer<>(true); - - setupConsumer( - consumer, false, null, true, // enable checkpointing; auto commit should be ignored - 0, 1); - - assertThat(consumer.getOffsetCommitMode()).isEqualTo(OffsetCommitMode.ON_CHECKPOINTS); - } - - @Test - public void testConfigureAutoCommitMode() throws Exception { - @SuppressWarnings("unchecked") - final DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer<>(true); - - setupConsumer(consumer); - - assertThat(consumer.getOffsetCommitMode()).isEqualTo(OffsetCommitMode.KAFKA_PERIODIC); - } - - @Test - public void testConfigureDisableOffsetCommitWithCheckpointing() throws Exception { - @SuppressWarnings("unchecked") - // auto-commit enabled; this should be ignored in this case - final DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer<>(true); - consumer.setCommitOffsetsOnCheckpoints( - false); // disabling offset committing should override everything - - setupConsumer( - consumer, false, null, true, // enable checkpointing; auto commit should be ignored - 0, 1); - - assertThat(consumer.getOffsetCommitMode()).isEqualTo(OffsetCommitMode.DISABLED); - } - - @Test - public void testConfigureDisableOffsetCommitWithoutCheckpointing() throws Exception { - @SuppressWarnings("unchecked") - final DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer<>(false); - - setupConsumer(consumer); - - assertThat(consumer.getOffsetCommitMode()).isEqualTo(OffsetCommitMode.DISABLED); - } - - /** - * Tests that subscribed partitions didn't change when there's no change on the initial topics. - * (filterRestoredPartitionsWithDiscovered is active) - */ - @Test - public void testSetFilterRestoredParitionsNoChange() throws Exception { - checkFilterRestoredPartitionsWithDisovered( - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - false); - } - - /** - * Tests that removed partitions will be removed from subscribed partitions Even if it's still - * in restored partitions. (filterRestoredPartitionsWithDiscovered is active) - */ - @Test - public void testSetFilterRestoredParitionsWithRemovedTopic() throws Exception { - checkFilterRestoredPartitionsWithDisovered( - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1"}), - Arrays.asList(new String[] {"kafka_topic_1"}), - false); - } - - /** - * Tests that newly added partitions will be added to subscribed partitions. - * (filterRestoredPartitionsWithDiscovered is active) - */ - @Test - public void testSetFilterRestoredParitionsWithAddedTopic() throws Exception { - checkFilterRestoredPartitionsWithDisovered( - Arrays.asList(new String[] {"kafka_topic_1"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - false); - } - - /** - * Tests that subscribed partitions are the same when there's no change on the initial topics. - * (filterRestoredPartitionsWithDiscovered is disabled) - */ - @Test - public void testDisableFilterRestoredParitionsNoChange() throws Exception { - checkFilterRestoredPartitionsWithDisovered( - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - true); - } - - /** - * Tests that removed partitions will not be removed from subscribed partitions Even if it's - * still in restored partitions. (filterRestoredPartitionsWithDiscovered is disabled) - */ - @Test - public void testDisableFilterRestoredParitionsWithRemovedTopic() throws Exception { - checkFilterRestoredPartitionsWithDisovered( - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - true); - } - - /** - * Tests that newly added partitions will be added to subscribed partitions. - * (filterRestoredPartitionsWithDiscovered is disabled) - */ - @Test - public void testDisableFilterRestoredParitionsWithAddedTopic() throws Exception { - checkFilterRestoredPartitionsWithDisovered( - Arrays.asList(new String[] {"kafka_topic_1"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - Arrays.asList(new String[] {"kafka_topic_1", "kafka_topic_2"}), - true); - } - - private void checkFilterRestoredPartitionsWithDisovered( - List restoredKafkaTopics, - List initKafkaTopics, - List expectedSubscribedPartitions, - Boolean disableFiltering) - throws Exception { - final AbstractPartitionDiscoverer discoverer = - new TestPartitionDiscoverer( - new KafkaTopicsDescriptor(initKafkaTopics, null), - 0, - 1, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - initKafkaTopics), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - initKafkaTopics.stream() - .map(topic -> new KafkaTopicPartition(topic, 0)) - .collect(Collectors.toList()))); - final FlinkKafkaConsumerBase consumer = - new DummyFlinkKafkaConsumer<>(initKafkaTopics, discoverer); - if (disableFiltering) { - consumer.disableFilterRestoredPartitionsWithSubscribedTopics(); - } - - final TestingListState> listState = - new TestingListState<>(); - - for (int i = 0; i < restoredKafkaTopics.size(); i++) { - listState.add( - new Tuple2<>(new KafkaTopicPartition(restoredKafkaTopics.get(i), 0), 12345L)); - } - - setupConsumer(consumer, true, listState, true, 0, 1); - - Map subscribedPartitionsToStartOffsets = - consumer.getSubscribedPartitionsToStartOffsets(); - - assertThat( - subscribedPartitionsToStartOffsets.keySet().stream() - .map(partition -> partition.getTopic()) - .collect(Collectors.toSet())) - .isEqualTo(new HashSet<>(expectedSubscribedPartitions)); - } - - @Test - @SuppressWarnings("unchecked") - public void testSnapshotStateWithCommitOnCheckpointsEnabled() throws Exception { - - // -------------------------------------------------------------------- - // prepare fake states - // -------------------------------------------------------------------- - - final HashMap state1 = new HashMap<>(); - state1.put(new KafkaTopicPartition("abc", 13), 16768L); - state1.put(new KafkaTopicPartition("def", 7), 987654321L); - - final HashMap state2 = new HashMap<>(); - state2.put(new KafkaTopicPartition("abc", 13), 16770L); - state2.put(new KafkaTopicPartition("def", 7), 987654329L); - - final HashMap state3 = new HashMap<>(); - state3.put(new KafkaTopicPartition("abc", 13), 16780L); - state3.put(new KafkaTopicPartition("def", 7), 987654377L); - - // -------------------------------------------------------------------- - - final MockFetcher fetcher = new MockFetcher<>(state1, state2, state3); - - final FlinkKafkaConsumerBase consumer = - new DummyFlinkKafkaConsumer<>( - fetcher, mock(AbstractPartitionDiscoverer.class), false); - - final TestingListState listState = new TestingListState<>(); - - // setup and run the consumer; wait until the consumer reaches the main fetch loop before - // continuing test - setupConsumer(consumer, false, listState, true, 0, 1); - - final CheckedThread runThread = - new CheckedThread() { - @Override - public void go() throws Exception { - consumer.run(new TestSourceContext<>()); - } - }; - runThread.start(); - fetcher.waitUntilRun(); - - assertThat(consumer.getPendingOffsetsToCommit()).isEmpty(); - - // checkpoint 1 - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(138, 138)); - - HashMap snapshot1 = new HashMap<>(); - - for (Serializable serializable : listState.get()) { - Tuple2 kafkaTopicPartitionLongTuple2 = - (Tuple2) serializable; - snapshot1.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1); - } - - assertThat(snapshot1).isEqualTo(state1); - assertThat(consumer.getPendingOffsetsToCommit()).hasSize(1); - assertThat(consumer.getPendingOffsetsToCommit().get(138L)).isEqualTo(state1); - - // checkpoint 2 - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(140, 140)); - - HashMap snapshot2 = new HashMap<>(); - - for (Serializable serializable : listState.get()) { - Tuple2 kafkaTopicPartitionLongTuple2 = - (Tuple2) serializable; - snapshot2.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1); - } - - assertThat(snapshot2).isEqualTo(state2); - assertThat(consumer.getPendingOffsetsToCommit()).hasSize(2); - assertThat(consumer.getPendingOffsetsToCommit().get(140L)).isEqualTo(state2); - - // ack checkpoint 1 - consumer.notifyCheckpointComplete(138L); - assertThat(consumer.getPendingOffsetsToCommit()).hasSize(1); - assertThat(consumer.getPendingOffsetsToCommit()).containsKey(140L); - assertThat(fetcher.getAndClearLastCommittedOffsets()).isEqualTo(state1); - assertThat(fetcher.getCommitCount()).isEqualTo(1); - - // checkpoint 3 - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(141, 141)); - - HashMap snapshot3 = new HashMap<>(); - - for (Serializable serializable : listState.get()) { - Tuple2 kafkaTopicPartitionLongTuple2 = - (Tuple2) serializable; - snapshot3.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1); - } - - assertThat(snapshot3).isEqualTo(state3); - assertThat(consumer.getPendingOffsetsToCommit()).hasSize(2); - assertThat(consumer.getPendingOffsetsToCommit().get(141L)).isEqualTo(state3); - - // ack checkpoint 3, subsumes number 2 - consumer.notifyCheckpointComplete(141L); - assertThat(consumer.getPendingOffsetsToCommit()).isEmpty(); - assertThat(fetcher.getAndClearLastCommittedOffsets()).isEqualTo(state3); - assertThat(fetcher.getCommitCount()).isEqualTo(2); - - consumer.notifyCheckpointComplete(666); // invalid checkpoint - assertThat(consumer.getPendingOffsetsToCommit()).isEmpty(); - assertThat(fetcher.getAndClearLastCommittedOffsets()).isNull(); - assertThat(fetcher.getCommitCount()).isEqualTo(2); - - consumer.cancel(); - runThread.sync(); - } - - @Test - @SuppressWarnings("unchecked") - public void testSnapshotStateWithCommitOnCheckpointsDisabled() throws Exception { - // -------------------------------------------------------------------- - // prepare fake states - // -------------------------------------------------------------------- - - final HashMap state1 = new HashMap<>(); - state1.put(new KafkaTopicPartition("abc", 13), 16768L); - state1.put(new KafkaTopicPartition("def", 7), 987654321L); - - final HashMap state2 = new HashMap<>(); - state2.put(new KafkaTopicPartition("abc", 13), 16770L); - state2.put(new KafkaTopicPartition("def", 7), 987654329L); - - final HashMap state3 = new HashMap<>(); - state3.put(new KafkaTopicPartition("abc", 13), 16780L); - state3.put(new KafkaTopicPartition("def", 7), 987654377L); - - // -------------------------------------------------------------------- - - final MockFetcher fetcher = new MockFetcher<>(state1, state2, state3); - - final FlinkKafkaConsumerBase consumer = - new DummyFlinkKafkaConsumer<>( - fetcher, mock(AbstractPartitionDiscoverer.class), false); - consumer.setCommitOffsetsOnCheckpoints(false); // disable offset committing - - final TestingListState listState = new TestingListState<>(); - - // setup and run the consumer; wait until the consumer reaches the main fetch loop before - // continuing test - setupConsumer(consumer, false, listState, true, 0, 1); - - final CheckedThread runThread = - new CheckedThread() { - @Override - public void go() throws Exception { - consumer.run(new TestSourceContext<>()); - } - }; - runThread.start(); - fetcher.waitUntilRun(); - - assertThat(consumer.getPendingOffsetsToCommit()).isEmpty(); - - // checkpoint 1 - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(138, 138)); - - HashMap snapshot1 = new HashMap<>(); - - for (Serializable serializable : listState.get()) { - Tuple2 kafkaTopicPartitionLongTuple2 = - (Tuple2) serializable; - snapshot1.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1); - } - - assertThat(snapshot1).isEqualTo(state1); - assertThat(consumer.getPendingOffsetsToCommit().size()) - .isEqualTo(0); // pending offsets to commit should not be updated - - // checkpoint 2 - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(140, 140)); - - HashMap snapshot2 = new HashMap<>(); - - for (Serializable serializable : listState.get()) { - Tuple2 kafkaTopicPartitionLongTuple2 = - (Tuple2) serializable; - snapshot2.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1); - } - - assertThat(snapshot2).isEqualTo(state2); - assertThat(consumer.getPendingOffsetsToCommit().size()) - .isEqualTo(0); // pending offsets to commit should not be updated - - // ack checkpoint 1 - consumer.notifyCheckpointComplete(138L); - assertThat(fetcher.getCommitCount()).isEqualTo(0); - assertThat(fetcher.getAndClearLastCommittedOffsets()) - .isNull(); // no offsets should be committed - - // checkpoint 3 - consumer.snapshotState(new StateSnapshotContextSynchronousImpl(141, 141)); - - HashMap snapshot3 = new HashMap<>(); - - for (Serializable serializable : listState.get()) { - Tuple2 kafkaTopicPartitionLongTuple2 = - (Tuple2) serializable; - snapshot3.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1); - } - - assertThat(snapshot3).isEqualTo(state3); - assertThat(consumer.getPendingOffsetsToCommit().size()) - .isEqualTo(0); // pending offsets to commit should not be updated - - // ack checkpoint 3, subsumes number 2 - consumer.notifyCheckpointComplete(141L); - assertThat(fetcher.getCommitCount()).isEqualTo(0); - assertThat(fetcher.getAndClearLastCommittedOffsets()) - .isNull(); // no offsets should be committed - - consumer.notifyCheckpointComplete(666); // invalid checkpoint - assertThat(fetcher.getCommitCount()).isEqualTo(0); - assertThat(fetcher.getAndClearLastCommittedOffsets()) - .isNull(); // no offsets should be committed - - consumer.cancel(); - runThread.sync(); - } - - @Test - public void testClosePartitionDiscovererWhenOpenThrowException() throws Exception { - final RuntimeException failureCause = - new RuntimeException(new FlinkException("Test partition discoverer exception")); - final FailingPartitionDiscoverer failingPartitionDiscoverer = - new FailingPartitionDiscoverer(failureCause); - - final DummyFlinkKafkaConsumer consumer = - new DummyFlinkKafkaConsumer<>(failingPartitionDiscoverer); - - testFailingConsumerLifecycle(consumer, failureCause); - assertThat(failingPartitionDiscoverer.isClosed()) - .as("partitionDiscoverer should be closed when consumer is closed") - .isTrue(); - } - - @Test - public void testClosePartitionDiscovererWhenCreateKafkaFetcherFails() throws Exception { - final FlinkException failureCause = new FlinkException("Create Kafka fetcher failure."); - - final DummyPartitionDiscoverer testPartitionDiscoverer = new DummyPartitionDiscoverer(); - final DummyFlinkKafkaConsumer consumer = - new DummyFlinkKafkaConsumer<>( - () -> { - throw failureCause; - }, - testPartitionDiscoverer, - 100L); - - testFailingConsumerLifecycle(consumer, failureCause); - assertThat(testPartitionDiscoverer.isClosed()) - .as("partitionDiscoverer should be closed when consumer is closed") - .isTrue(); - } - - @Test - public void testClosePartitionDiscovererWhenKafkaFetcherFails() throws Exception { - final FlinkException failureCause = new FlinkException("Run Kafka fetcher failure."); - - // in this scenario, the partition discoverer will be concurrently accessed; - // use the WakeupBeforeCloseTestingPartitionDiscoverer to verify that we always call - // wakeup() before closing the discoverer - final WakeupBeforeCloseTestingPartitionDiscoverer testPartitionDiscoverer = - new WakeupBeforeCloseTestingPartitionDiscoverer(); - - final AbstractFetcher mock = - (AbstractFetcher) mock(AbstractFetcher.class); - doThrow(failureCause).when(mock).runFetchLoop(); - - final DummyFlinkKafkaConsumer consumer = - new DummyFlinkKafkaConsumer<>(() -> mock, testPartitionDiscoverer, 100L); - - testFailingConsumerLifecycle(consumer, failureCause); - assertThat(testPartitionDiscoverer.isClosed()) - .as("partitionDiscoverer should be closed when consumer is closed") - .isTrue(); - } - - private void testFailingConsumerLifecycle( - FlinkKafkaConsumerBase testKafkaConsumer, @Nonnull Exception expectedException) - throws Exception { - try { - setupConsumer(testKafkaConsumer); - testKafkaConsumer.run(new TestSourceContext<>()); - - fail( - "Exception should have been thrown from open / run method of FlinkKafkaConsumerBase."); - } catch (Exception e) { - assertThat( - ExceptionUtils.findThrowable( - e, throwable -> throwable.equals(expectedException))) - .isPresent(); - } - testKafkaConsumer.close(); - } - - @Test - public void testClosePartitionDiscovererWithCancellation() throws Exception { - final DummyPartitionDiscoverer testPartitionDiscoverer = new DummyPartitionDiscoverer(); - - final TestingFlinkKafkaConsumer consumer = - new TestingFlinkKafkaConsumer<>(testPartitionDiscoverer, 100L); - - testNormalConsumerLifecycle(consumer); - assertThat(testPartitionDiscoverer.isClosed()) - .as("partitionDiscoverer should be closed when consumer is closed") - .isTrue(); - } - - private void testNormalConsumerLifecycle(FlinkKafkaConsumerBase testKafkaConsumer) - throws Exception { - setupConsumer(testKafkaConsumer); - final CompletableFuture runFuture = - CompletableFuture.runAsync( - ThrowingRunnable.unchecked( - () -> testKafkaConsumer.run(new TestSourceContext<>()))); - testKafkaConsumer.close(); - runFuture.get(); - } - - private void setupConsumer(FlinkKafkaConsumerBase consumer) throws Exception { - setupConsumer(consumer, false, null, false, 0, 1); - } - - /** - * Before using an explicit TypeSerializer for the partition state the {@link - * FlinkKafkaConsumerBase} was creating a serializer using a {@link TypeHint}. Here, we verify - * that the two methods create compatible serializers. - */ - @Test - public void testExplicitStateSerializerCompatibility() throws Exception { - ExecutionConfig executionConfig = new ExecutionConfig(); - - Tuple2 tuple = - new Tuple2<>(new KafkaTopicPartition("dummy", 0), 42L); - - // This is how the KafkaConsumerBase used to create the TypeSerializer - TypeInformation> originalTypeHintTypeInfo = - new TypeHint>() {}.getTypeInfo(); - TypeSerializer> serializerFromTypeHint = - originalTypeHintTypeInfo.createSerializer(executionConfig); - byte[] bytes = InstantiationUtil.serializeToByteArray(serializerFromTypeHint, tuple); - - // Directly use the Consumer to create the TypeSerializer (using the new method) - TupleSerializer> kafkaConsumerSerializer = - FlinkKafkaConsumerBase.createStateSerializer(executionConfig); - Tuple2 actualTuple = - InstantiationUtil.deserializeFromByteArray(kafkaConsumerSerializer, bytes); - - assertThat(actualTuple) - .as( - "Explicit Serializer is not compatible with previous method of creating Serializer using TypeHint.") - .isEqualTo(tuple); - } - - @Test - public void testScaleUp() throws Exception { - testRescaling(5, 2, 8, 30); - } - - @Test - public void testScaleDown() throws Exception { - testRescaling(5, 10, 2, 100); - } - - /** - * Tests whether the Kafka consumer behaves correctly when scaling the parallelism up/down, - * which means that operator state is being reshuffled. - * - *

    This also verifies that a restoring source is always impervious to changes in the list of - * topics fetched from Kafka. - */ - @SuppressWarnings("unchecked") - private void testRescaling( - final int initialParallelism, - final int numPartitions, - final int restoredParallelism, - final int restoredNumPartitions) - throws Exception { - - Preconditions.checkArgument( - restoredNumPartitions >= numPartitions, - "invalid test case for Kafka repartitioning; Kafka only allows increasing partitions."); - - List mockFetchedPartitionsOnStartup = new ArrayList<>(); - for (int i = 0; i < numPartitions; i++) { - mockFetchedPartitionsOnStartup.add(new KafkaTopicPartition("test-topic", i)); - } - - DummyFlinkKafkaConsumer[] consumers = - new DummyFlinkKafkaConsumer[initialParallelism]; - - AbstractStreamOperatorTestHarness[] testHarnesses = - new AbstractStreamOperatorTestHarness[initialParallelism]; - - List testTopics = Collections.singletonList("test-topic"); - - for (int i = 0; i < initialParallelism; i++) { - TestPartitionDiscoverer partitionDiscoverer = - new TestPartitionDiscoverer( - new KafkaTopicsDescriptor(testTopics, null), - i, - initialParallelism, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - testTopics), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - mockFetchedPartitionsOnStartup)); - - consumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer); - testHarnesses[i] = createTestHarness(consumers[i], initialParallelism, i); - - // initializeState() is always called, null signals that we didn't restore - testHarnesses[i].initializeEmptyState(); - testHarnesses[i].open(); - } - - Map globalSubscribedPartitions = new HashMap<>(); - - for (int i = 0; i < initialParallelism; i++) { - Map subscribedPartitions = - consumers[i].getSubscribedPartitionsToStartOffsets(); - - // make sure that no one else is subscribed to these partitions - for (KafkaTopicPartition partition : subscribedPartitions.keySet()) { - assertThat(globalSubscribedPartitions).satisfies(matching(not(hasKey(partition)))); - } - globalSubscribedPartitions.putAll(subscribedPartitions); - } - - assertThat(globalSubscribedPartitions.values()).satisfies(matching(hasSize(numPartitions))); - assertThat(mockFetchedPartitionsOnStartup) - .satisfies(matching(everyItem(isIn(globalSubscribedPartitions.keySet())))); - - OperatorSubtaskState[] state = new OperatorSubtaskState[initialParallelism]; - - for (int i = 0; i < initialParallelism; i++) { - state[i] = testHarnesses[i].snapshot(0, 0); - } - - OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(state); - - // ----------------------------------------------------------------------------------------- - // restore - - List mockFetchedPartitionsAfterRestore = new ArrayList<>(); - for (int i = 0; i < restoredNumPartitions; i++) { - mockFetchedPartitionsAfterRestore.add(new KafkaTopicPartition("test-topic", i)); - } - - DummyFlinkKafkaConsumer[] restoredConsumers = - new DummyFlinkKafkaConsumer[restoredParallelism]; - - AbstractStreamOperatorTestHarness[] restoredTestHarnesses = - new AbstractStreamOperatorTestHarness[restoredParallelism]; - - for (int i = 0; i < restoredParallelism; i++) { - OperatorSubtaskState initState = - AbstractStreamOperatorTestHarness.repartitionOperatorState( - mergedState, - maxParallelism, - initialParallelism, - restoredParallelism, - i); - - TestPartitionDiscoverer partitionDiscoverer = - new TestPartitionDiscoverer( - new KafkaTopicsDescriptor(testTopics, null), - i, - restoredParallelism, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - testTopics), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - mockFetchedPartitionsAfterRestore)); - - restoredConsumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer); - restoredTestHarnesses[i] = - createTestHarness(restoredConsumers[i], restoredParallelism, i); - - // initializeState() is always called, null signals that we didn't restore - restoredTestHarnesses[i].initializeState(initState); - restoredTestHarnesses[i].open(); - } - - Map restoredGlobalSubscribedPartitions = new HashMap<>(); - - for (int i = 0; i < restoredParallelism; i++) { - Map subscribedPartitions = - restoredConsumers[i].getSubscribedPartitionsToStartOffsets(); - - // make sure that no one else is subscribed to these partitions - for (KafkaTopicPartition partition : subscribedPartitions.keySet()) { - assertThat(restoredGlobalSubscribedPartitions) - .satisfies(matching(not(hasKey(partition)))); - } - restoredGlobalSubscribedPartitions.putAll(subscribedPartitions); - } - - assertThat(restoredGlobalSubscribedPartitions.values()) - .satisfies(matching(hasSize(restoredNumPartitions))); - assertThat(mockFetchedPartitionsOnStartup) - .satisfies(matching(everyItem(isIn(restoredGlobalSubscribedPartitions.keySet())))); - } - - @Test - public void testOpen() throws Exception { - MockDeserializationSchema deserializationSchema = new MockDeserializationSchema<>(); - - AbstractStreamOperatorTestHarness testHarness = - createTestHarness( - new DummyFlinkKafkaConsumer<>( - new KafkaDeserializationSchemaWrapper<>(deserializationSchema)), - 1, - 0); - - testHarness.open(); - assertThat(deserializationSchema.isOpenCalled()).as("Open method was not called").isTrue(); - } - - @Test - public void testOpenWithRestoreState() throws Exception { - MockDeserializationSchema deserializationSchema = new MockDeserializationSchema<>(); - final FlinkKafkaConsumerBase consumer = - new DummyFlinkKafkaConsumer<>( - new KafkaDeserializationSchemaWrapper<>(deserializationSchema)); - - final TestingListState> restoredListState = - new TestingListState<>(); - setupConsumer(consumer, true, restoredListState, true, 0, 1); - - assertThat(deserializationSchema.isOpenCalled()) - .as("DeserializationSchema's open method was not invoked") - .isTrue(); - } - - // ------------------------------------------------------------------------ - - private static AbstractStreamOperatorTestHarness createTestHarness( - SourceFunction source, int numSubtasks, int subtaskIndex) throws Exception { - - AbstractStreamOperatorTestHarness testHarness = - new AbstractStreamOperatorTestHarness<>( - new StreamSource<>(source), maxParallelism, numSubtasks, subtaskIndex); - - testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); - - return testHarness; - } - - // ------------------------------------------------------------------------ - - /** - * A dummy partition discoverer that always throws an exception from discoverPartitions() - * method. - */ - private static class FailingPartitionDiscoverer extends AbstractPartitionDiscoverer { - - private volatile boolean closed = false; - - private final RuntimeException failureCause; - - public FailingPartitionDiscoverer(RuntimeException failureCause) { - super(new KafkaTopicsDescriptor(Arrays.asList("foo"), null), 0, 1); - this.failureCause = failureCause; - } - - @Override - protected void initializeConnections() throws Exception { - closed = false; - } - - @Override - protected void wakeupConnections() {} - - @Override - protected void closeConnections() throws Exception { - closed = true; - } - - @Override - protected List getAllTopics() throws WakeupException { - return null; - } - - @Override - protected List getAllPartitionsForTopics(List topics) - throws WakeupException { - return null; - } - - @Override - public List discoverPartitions() - throws WakeupException, ClosedException { - throw failureCause; - } - - public boolean isClosed() { - return closed; - } - } - - private static class WakeupBeforeCloseTestingPartitionDiscoverer - extends DummyPartitionDiscoverer { - @Override - protected void closeConnections() { - if (!isWakedUp()) { - fail("Partition discoverer should have been waked up first before closing."); - } - - super.closeConnections(); - } - } - - private static class DummyPartitionDiscoverer extends AbstractPartitionDiscoverer { - - private final List allTopics; - private final List allPartitions; - private volatile boolean closed = false; - private volatile boolean wakedUp = false; - - private DummyPartitionDiscoverer() { - super(new KafkaTopicsDescriptor(Collections.singletonList("foo"), null), 0, 1); - this.allTopics = Collections.singletonList("foo"); - this.allPartitions = Collections.singletonList(new KafkaTopicPartition("foo", 0)); - } - - @Override - protected void initializeConnections() { - // noop - } - - @Override - protected void wakeupConnections() { - wakedUp = true; - } - - @Override - protected void closeConnections() { - closed = true; - } - - @Override - protected List getAllTopics() throws WakeupException { - checkState(); - - return allTopics; - } - - @Override - protected List getAllPartitionsForTopics(List topics) - throws WakeupException { - checkState(); - return allPartitions; - } - - private void checkState() throws WakeupException { - if (wakedUp || closed) { - throw new WakeupException(); - } - } - - boolean isClosed() { - return closed; - } - - public boolean isWakedUp() { - return wakedUp; - } - } - - private static class TestingFetcher extends AbstractFetcher { - - private volatile boolean isRunning = true; - - protected TestingFetcher( - SourceFunction.SourceContext sourceContext, - Map seedPartitionsWithInitialOffsets, - SerializedValue> watermarkStrategy, - ProcessingTimeService processingTimeProvider, - long autoWatermarkInterval, - ClassLoader userCodeClassLoader, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - super( - sourceContext, - seedPartitionsWithInitialOffsets, - watermarkStrategy, - processingTimeProvider, - autoWatermarkInterval, - userCodeClassLoader, - consumerMetricGroup, - useMetrics); - } - - @Override - public void runFetchLoop() throws Exception { - while (isRunning) { - Thread.sleep(10L); - } - } - - @Override - public void cancel() { - isRunning = false; - } - - @Override - protected void doCommitInternalOffsetsToKafka( - Map offsets, @Nonnull KafkaCommitCallback commitCallback) - throws Exception {} - - @Override - protected KPH createKafkaPartitionHandle(KafkaTopicPartition partition) { - return null; - } - } - - /** - * An instantiable dummy {@link FlinkKafkaConsumerBase} that supports injecting mocks for {@link - * FlinkKafkaConsumerBase#kafkaFetcher}, {@link FlinkKafkaConsumerBase#partitionDiscoverer}, and - * {@link FlinkKafkaConsumerBase#getIsAutoCommitEnabled()}. - */ - private static class DummyFlinkKafkaConsumer extends FlinkKafkaConsumerBase { - private static final long serialVersionUID = 1L; - - private SupplierWithException, Exception> testFetcherSupplier; - private AbstractPartitionDiscoverer testPartitionDiscoverer; - private boolean isAutoCommitEnabled; - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer() { - this(false); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer(boolean isAutoCommitEnabled) { - this( - mock(AbstractFetcher.class), - mock(AbstractPartitionDiscoverer.class), - isAutoCommitEnabled); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer(AbstractPartitionDiscoverer abstractPartitionDiscoverer) { - this(mock(AbstractFetcher.class), abstractPartitionDiscoverer, false); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer(KafkaDeserializationSchema kafkaDeserializationSchema) { - this( - () -> mock(AbstractFetcher.class), - mock(AbstractPartitionDiscoverer.class), - false, - PARTITION_DISCOVERY_DISABLED, - Collections.singletonList("dummy-topic"), - kafkaDeserializationSchema); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer( - List topics, AbstractPartitionDiscoverer abstractPartitionDiscoverer) { - this( - () -> mock(AbstractFetcher.class), - abstractPartitionDiscoverer, - false, - PARTITION_DISCOVERY_DISABLED, - topics, - (KeyedDeserializationSchema) mock(KeyedDeserializationSchema.class)); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer( - SupplierWithException, Exception> abstractFetcherSupplier, - AbstractPartitionDiscoverer abstractPartitionDiscoverer, - long discoveryIntervalMillis) { - this( - abstractFetcherSupplier, - abstractPartitionDiscoverer, - false, - discoveryIntervalMillis); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer( - AbstractFetcher testFetcher, - AbstractPartitionDiscoverer testPartitionDiscoverer, - boolean isAutoCommitEnabled) { - this( - testFetcher, - testPartitionDiscoverer, - isAutoCommitEnabled, - PARTITION_DISCOVERY_DISABLED); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer( - AbstractFetcher testFetcher, - AbstractPartitionDiscoverer testPartitionDiscoverer, - boolean isAutoCommitEnabled, - long discoveryIntervalMillis) { - this( - () -> testFetcher, - testPartitionDiscoverer, - isAutoCommitEnabled, - discoveryIntervalMillis); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer( - SupplierWithException, Exception> testFetcherSupplier, - AbstractPartitionDiscoverer testPartitionDiscoverer, - boolean isAutoCommitEnabled, - long discoveryIntervalMillis) { - this( - testFetcherSupplier, - testPartitionDiscoverer, - isAutoCommitEnabled, - discoveryIntervalMillis, - Collections.singletonList("dummy-topic"), - (KeyedDeserializationSchema) mock(KeyedDeserializationSchema.class)); - } - - @SuppressWarnings("unchecked") - DummyFlinkKafkaConsumer( - SupplierWithException, Exception> testFetcherSupplier, - AbstractPartitionDiscoverer testPartitionDiscoverer, - boolean isAutoCommitEnabled, - long discoveryIntervalMillis, - List topics, - KafkaDeserializationSchema mock) { - - super(topics, null, mock, discoveryIntervalMillis, false); - - this.testFetcherSupplier = testFetcherSupplier; - this.testPartitionDiscoverer = testPartitionDiscoverer; - this.isAutoCommitEnabled = isAutoCommitEnabled; - } - - @Override - protected AbstractFetcher createFetcher( - SourceContext sourceContext, - Map thisSubtaskPartitionsWithStartOffsets, - SerializedValue> watermarkStrategy, - StreamingRuntimeContext runtimeContext, - OffsetCommitMode offsetCommitMode, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - return testFetcherSupplier.get(); - } - - @Override - protected AbstractPartitionDiscoverer createPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks) { - return this.testPartitionDiscoverer; - } - - @Override - protected boolean getIsAutoCommitEnabled() { - return isAutoCommitEnabled; - } - - @Override - protected Map fetchOffsetsWithTimestamp( - Collection partitions, long timestamp) { - throw new UnsupportedOperationException(); - } - } - - private static class TestingFlinkKafkaConsumer extends FlinkKafkaConsumerBase { - - private static final long serialVersionUID = 935384661907656996L; - - private final AbstractPartitionDiscoverer partitionDiscoverer; - - TestingFlinkKafkaConsumer( - final AbstractPartitionDiscoverer partitionDiscoverer, - long discoveryIntervalMillis) { - super( - Collections.singletonList("dummy-topic"), - null, - (KafkaDeserializationSchema) mock(KafkaDeserializationSchema.class), - discoveryIntervalMillis, - false); - this.partitionDiscoverer = partitionDiscoverer; - } - - @Override - protected AbstractFetcher createFetcher( - SourceContext sourceContext, - Map thisSubtaskPartitionsWithStartOffsets, - SerializedValue> watermarkStrategy, - StreamingRuntimeContext runtimeContext, - OffsetCommitMode offsetCommitMode, - MetricGroup consumerMetricGroup, - boolean useMetrics) - throws Exception { - return new TestingFetcher( - sourceContext, - thisSubtaskPartitionsWithStartOffsets, - watermarkStrategy, - runtimeContext.getProcessingTimeService(), - 0L, - getClass().getClassLoader(), - consumerMetricGroup, - useMetrics); - } - - @Override - protected AbstractPartitionDiscoverer createPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks) { - return partitionDiscoverer; - } - - @Override - protected boolean getIsAutoCommitEnabled() { - return false; - } - - @Override - protected Map fetchOffsetsWithTimestamp( - Collection partitions, long timestamp) { - throw new UnsupportedOperationException("fetchOffsetsWithTimestamp is not supported"); - } - } - - private static final class TestingListState implements ListState { - - private final List list = new ArrayList<>(); - private boolean clearCalled = false; - - @Override - public void clear() { - list.clear(); - clearCalled = true; - } - - @Override - public Iterable get() throws Exception { - return list; - } - - @Override - public void add(T value) throws Exception { - Preconditions.checkNotNull(value, "You cannot add null to a ListState."); - list.add(value); - } - - public List getList() { - return list; - } - - boolean isClearCalled() { - return clearCalled; - } - - @Override - public void update(List values) throws Exception { - clear(); - - addAll(values); - } - - @Override - public void addAll(List values) throws Exception { - if (values != null) { - values.forEach( - v -> Preconditions.checkNotNull(v, "You cannot add null to a ListState.")); - - list.addAll(values); - } - } - } - - @SuppressWarnings("unchecked") - private static void setupConsumer( - FlinkKafkaConsumerBase consumer, - boolean isRestored, - ListState restoredListState, - boolean isCheckpointingEnabled, - int subtaskIndex, - int totalNumSubtasks) - throws Exception { - - // run setup procedure in operator life cycle - consumer.setRuntimeContext( - new MockStreamingRuntimeContext( - isCheckpointingEnabled, totalNumSubtasks, subtaskIndex)); - consumer.initializeState( - new MockFunctionInitializationContext( - isRestored, new MockOperatorStateStore(restoredListState))); - consumer.open(new Configuration()); - } - - private static class MockFetcher extends AbstractFetcher { - - private final OneShotLatch runLatch = new OneShotLatch(); - private final OneShotLatch stopLatch = new OneShotLatch(); - - private final ArrayDeque> stateSnapshotsToReturn = - new ArrayDeque<>(); - - private Map lastCommittedOffsets; - private int commitCount = 0; - - @SafeVarargs - private MockFetcher(HashMap... stateSnapshotsToReturn) - throws Exception { - super( - new TestSourceContext<>(), - new HashMap<>(), - null /* watermark strategy */, - new TestProcessingTimeService(), - 0, - MockFetcher.class.getClassLoader(), - new UnregisteredMetricsGroup(), - false); - - this.stateSnapshotsToReturn.addAll(Arrays.asList(stateSnapshotsToReturn)); - } - - @Override - protected void doCommitInternalOffsetsToKafka( - Map offsets, @Nonnull KafkaCommitCallback commitCallback) - throws Exception { - this.lastCommittedOffsets = offsets; - this.commitCount++; - commitCallback.onSuccess(); - } - - @Override - public void runFetchLoop() throws Exception { - runLatch.trigger(); - stopLatch.await(); - } - - @Override - public HashMap snapshotCurrentState() { - checkState(!stateSnapshotsToReturn.isEmpty()); - return stateSnapshotsToReturn.poll(); - } - - @Override - protected Object createKafkaPartitionHandle(KafkaTopicPartition partition) { - throw new UnsupportedOperationException(); - } - - @Override - public void cancel() { - stopLatch.trigger(); - } - - private void waitUntilRun() throws InterruptedException { - runLatch.await(); - } - - private Map getAndClearLastCommittedOffsets() { - Map offsets = this.lastCommittedOffsets; - this.lastCommittedOffsets = null; - return offsets; - } - - private int getCommitCount() { - return commitCount; - } - } - - private static class MockOperatorStateStore implements OperatorStateStore { - - private final ListState mockRestoredUnionListState; - - private MockOperatorStateStore(ListState restoredUnionListState) { - this.mockRestoredUnionListState = restoredUnionListState; - } - - @Override - @SuppressWarnings("unchecked") - public ListState getUnionListState(ListStateDescriptor stateDescriptor) - throws Exception { - return (ListState) mockRestoredUnionListState; - } - - @Override - public BroadcastState getBroadcastState( - MapStateDescriptor stateDescriptor) throws Exception { - throw new UnsupportedOperationException(); - } - - @Override - public ListState getListState(ListStateDescriptor stateDescriptor) - throws Exception { - throw new UnsupportedOperationException(); - } - - @Override - public Set getRegisteredStateNames() { - throw new UnsupportedOperationException(); - } - - @Override - public Set getRegisteredBroadcastStateNames() { - throw new UnsupportedOperationException(); - } - } - - private static class MockFunctionInitializationContext - implements FunctionInitializationContext { - - private final boolean isRestored; - private final OperatorStateStore operatorStateStore; - - private MockFunctionInitializationContext( - boolean isRestored, OperatorStateStore operatorStateStore) { - this.isRestored = isRestored; - this.operatorStateStore = operatorStateStore; - } - - @Override - public boolean isRestored() { - return isRestored; - } - - @Override - public OptionalLong getRestoredCheckpointId() { - return isRestored ? OptionalLong.of(1L) : OptionalLong.empty(); - } - - @Override - public OperatorStateStore getOperatorStateStore() { - return operatorStateStore; - } - - @Override - public KeyedStateStore getKeyedStateStore() { - throw new UnsupportedOperationException(); - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerITCase.java deleted file mode 100644 index 90c773730..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaConsumerITCase.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.functions.MapFunction; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.configuration.CheckpointingOptions; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.connector.kafka.testutils.KafkaSourceTestEnv; -import org.apache.flink.core.execution.JobClient; -import org.apache.flink.core.execution.SavepointFormatType; -import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; -import org.apache.flink.streaming.api.datastream.DataStreamSource; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.sink.DiscardingSink; -import org.apache.flink.test.util.MiniClusterWithClientResource; - -import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.junit.ClassRule; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.TestInstance.Lifecycle; -import org.junit.jupiter.api.io.TempDir; - -import java.nio.file.Path; -import java.util.Properties; -import java.util.concurrent.CountDownLatch; - -/** ITCase tests class for {@link FlinkKafkaConsumer}. */ -@TestInstance(Lifecycle.PER_CLASS) -public class FlinkKafkaConsumerITCase { - private static final String TOPIC1 = "FlinkKafkaConsumerITCase_topic1"; - - @ClassRule - public static final MiniClusterWithClientResource MINI_CLUSTER = - new MiniClusterWithClientResource( - new MiniClusterResourceConfiguration.Builder() - .setConfiguration(new Configuration()) - .build()); - - @BeforeAll - public void setup() throws Throwable { - KafkaSourceTestEnv.setup(); - KafkaSourceTestEnv.setupTopic( - TOPIC1, true, true, KafkaSourceTestEnv::getRecordsForTopicWithoutTimestamp); - } - - @AfterAll - public void tearDown() throws Exception { - KafkaSourceTestEnv.tearDown(); - } - - @Test - public void testStopWithSavepoint(@TempDir Path savepointsDir) throws Exception { - Configuration config = - new Configuration() - .set( - CheckpointingOptions.SAVEPOINT_DIRECTORY, - savepointsDir.toUri().toString()); - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(config); - env.setParallelism(1); - - Properties properties = new Properties(); - properties.setProperty( - ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, - KafkaSourceTestEnv.brokerConnectionStrings); - properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "testStopWithSavepoint"); - properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); - - FlinkKafkaConsumer kafkaConsumer = - new FlinkKafkaConsumer<>( - TOPIC1, - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()), - properties); - DataStreamSource stream = env.addSource(kafkaConsumer); - - ProgressLatchingIdentityFunction.resetBeforeUse(); - stream.map(new ProgressLatchingIdentityFunction()).addSink(new DiscardingSink<>()); - - JobClient jobClient = env.executeAsync(); - - ProgressLatchingIdentityFunction.getProgressLatch().await(); - - // Check that stopWithSavepoint completes successfully - jobClient.stopWithSavepoint(false, null, SavepointFormatType.CANONICAL).get(); - // TODO: ideally we should test recovery, that there were no data losses etc, but this - // is already a deprecated class, so I'm not adding new tests for that now. - } - - private static class ProgressLatchingIdentityFunction implements MapFunction { - - static CountDownLatch progressLatch; - - static void resetBeforeUse() { - progressLatch = new CountDownLatch(1); - } - - public static CountDownLatch getProgressLatch() { - return progressLatch; - } - - @Override - public Integer map(Integer integer) throws Exception { - progressLatch.countDown(); - return integer; - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaInternalProducerITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaInternalProducerITCase.java deleted file mode 100644 index 15729a8c8..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaInternalProducerITCase.java +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.streaming.connectors.kafka.internals.FlinkKafkaInternalProducer; - -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.KafkaConsumer; -import org.apache.kafka.clients.producer.Callback; -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.time.Duration; -import java.util.Collections; -import java.util.Iterator; -import java.util.Properties; -import java.util.UUID; - -import static org.assertj.core.api.Assertions.assertThat; - -/** Tests for our own {@link FlinkKafkaInternalProducer}. */ -@SuppressWarnings("serial") -public class FlinkKafkaInternalProducerITCase extends KafkaTestBase { - protected String transactionalId; - protected Properties extraProperties; - private volatile Exception exceptionInCallback; - - @BeforeClass - public static void prepare() throws Exception { - LOG.info("-------------------------------------------------------------------------"); - LOG.info(" Starting KafkaTestBase "); - LOG.info("-------------------------------------------------------------------------"); - - Properties serverProperties = new Properties(); - serverProperties.put("transaction.state.log.num.partitions", Integer.toString(1)); - serverProperties.put("auto.leader.rebalance.enable", Boolean.toString(false)); - startClusters( - KafkaTestEnvironment.createConfig() - .setKafkaServersNumber(NUMBER_OF_KAFKA_SERVERS) - .setSecureMode(false) - .setHideKafkaBehindProxy(true) - .setKafkaServerProperties(serverProperties)); - } - - @Before - public void before() { - transactionalId = UUID.randomUUID().toString(); - extraProperties = new Properties(); - extraProperties.putAll(standardProps); - extraProperties.put("transactional.id", transactionalId); - extraProperties.put( - "key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); - extraProperties.put( - "value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); - extraProperties.put( - "key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); - extraProperties.put( - "value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); - extraProperties.put("isolation.level", "read_committed"); - } - - @Test(timeout = 60000L) - public void testHappyPath() throws Exception { - String topicName = "flink-kafka-producer-happy-path"; - - Producer kafkaProducer = new FlinkKafkaInternalProducer<>(extraProperties); - try { - kafkaProducer.initTransactions(); - kafkaProducer.beginTransaction(); - kafkaProducer.send( - new ProducerRecord<>(topicName, "42", "42"), new ErrorCheckingCallback()); - kafkaProducer.commitTransaction(); - } finally { - kafkaProducer.close(Duration.ofSeconds(5)); - } - assertThat(exceptionInCallback) - .as("The message should have been successfully sent") - .isNull(); - assertRecord(topicName, "42", "42"); - deleteTestTopic(topicName); - } - - @Test(timeout = 30000L) - public void testResumeTransaction() throws Exception { - String topicName = "flink-kafka-producer-resume-transaction"; - FlinkKafkaInternalProducer kafkaProducer = - new FlinkKafkaInternalProducer<>(extraProperties); - try { - kafkaProducer.initTransactions(); - kafkaProducer.beginTransaction(); - kafkaProducer.send( - new ProducerRecord<>(topicName, "42", "42"), new ErrorCheckingCallback()); - kafkaProducer.flush(); - assertThat(exceptionInCallback) - .as("The message should have been successfully sent") - .isNull(); - long producerId = kafkaProducer.getProducerId(); - short epoch = kafkaProducer.getEpoch(); - - FlinkKafkaInternalProducer resumeProducer = - new FlinkKafkaInternalProducer<>(extraProperties); - try { - resumeProducer.resumeTransaction(producerId, epoch); - resumeProducer.commitTransaction(); - } finally { - resumeProducer.close(Duration.ofSeconds(5)); - } - - assertRecord(topicName, "42", "42"); - - // this shouldn't throw - in case of network split, old producer might attempt to commit - // it's transaction - kafkaProducer.commitTransaction(); - - // this shouldn't fail also, for same reason as above - resumeProducer = new FlinkKafkaInternalProducer<>(extraProperties); - try { - resumeProducer.resumeTransaction(producerId, epoch); - resumeProducer.commitTransaction(); - } finally { - resumeProducer.close(Duration.ofSeconds(5)); - } - } finally { - kafkaProducer.close(Duration.ofSeconds(5)); - } - deleteTestTopic(topicName); - } - - @Test(timeout = 30000L, expected = IllegalStateException.class) - public void testPartitionsForAfterClosed() { - FlinkKafkaInternalProducer kafkaProducer = - new FlinkKafkaInternalProducer<>(extraProperties); - kafkaProducer.close(Duration.ofSeconds(5)); - kafkaProducer.partitionsFor("Topic"); - } - - @Test(timeout = 30000L, expected = IllegalStateException.class) - public void testInitTransactionsAfterClosed() { - FlinkKafkaInternalProducer kafkaProducer = - new FlinkKafkaInternalProducer<>(extraProperties); - kafkaProducer.close(Duration.ofSeconds(5)); - kafkaProducer.initTransactions(); - } - - @Test(timeout = 30000L, expected = IllegalStateException.class) - public void testBeginTransactionAfterClosed() { - FlinkKafkaInternalProducer kafkaProducer = - new FlinkKafkaInternalProducer<>(extraProperties); - kafkaProducer.initTransactions(); - kafkaProducer.close(Duration.ofSeconds(5)); - kafkaProducer.beginTransaction(); - } - - @Test(timeout = 30000L, expected = IllegalStateException.class) - public void testCommitTransactionAfterClosed() { - String topicName = "testCommitTransactionAfterClosed"; - FlinkKafkaInternalProducer kafkaProducer = getClosedProducer(topicName); - kafkaProducer.commitTransaction(); - } - - @Test(timeout = 30000L, expected = IllegalStateException.class) - public void testResumeTransactionAfterClosed() { - String topicName = "testAbortTransactionAfterClosed"; - FlinkKafkaInternalProducer kafkaProducer = getClosedProducer(topicName); - kafkaProducer.resumeTransaction(0L, (short) 1); - } - - @Test(timeout = 30000L, expected = IllegalStateException.class) - public void testAbortTransactionAfterClosed() { - String topicName = "testAbortTransactionAfterClosed"; - FlinkKafkaInternalProducer kafkaProducer = getClosedProducer(topicName); - kafkaProducer.abortTransaction(); - kafkaProducer.resumeTransaction(0L, (short) 1); - } - - @Test(timeout = 30000L, expected = IllegalStateException.class) - public void testFlushAfterClosed() { - String topicName = "testCommitTransactionAfterClosed"; - FlinkKafkaInternalProducer kafkaProducer = getClosedProducer(topicName); - kafkaProducer.flush(); - } - - @Test(timeout = 30000L) - public void testProducerWhenCommitEmptyPartitionsToOutdatedTxnCoordinator() throws Exception { - String topic = "flink-kafka-producer-txn-coordinator-changed-" + UUID.randomUUID(); - createTestTopic(topic, 1, 1); - Producer kafkaProducer = new FlinkKafkaInternalProducer<>(extraProperties); - try { - kafkaProducer.initTransactions(); - kafkaProducer.beginTransaction(); - restartBroker(kafkaServer.getLeaderToShutDown("__transaction_state")); - kafkaProducer.flush(); - kafkaProducer.commitTransaction(); - } finally { - kafkaProducer.close(Duration.ofSeconds(5)); - } - deleteTestTopic(topic); - } - - private FlinkKafkaInternalProducer getClosedProducer(String topicName) { - FlinkKafkaInternalProducer kafkaProducer = - new FlinkKafkaInternalProducer<>(extraProperties); - kafkaProducer.initTransactions(); - kafkaProducer.beginTransaction(); - kafkaProducer.send( - new ProducerRecord<>(topicName, "42", "42"), new ErrorCheckingCallback()); - kafkaProducer.close(Duration.ofSeconds(5)); - assertThat(exceptionInCallback) - .as("The message should have been successfully sent") - .isNull(); - return kafkaProducer; - } - - private void assertRecord(String topicName, String expectedKey, String expectedValue) { - try (KafkaConsumer kafkaConsumer = new KafkaConsumer<>(extraProperties)) { - kafkaConsumer.subscribe(Collections.singletonList(topicName)); - ConsumerRecords records = ConsumerRecords.empty(); - while (records.isEmpty()) { - records = kafkaConsumer.poll(Duration.ofMillis(10000)); - } - - final Iterator> it = records.iterator(); - ConsumerRecord record = it.next(); - assertThat(record.key()).isEqualTo(expectedKey); - assertThat(record.value()).isEqualTo(expectedValue); - assertThat(it.hasNext()).isFalse(); - } - } - - private void restartBroker(int brokerId) throws Exception { - kafkaServer.stopBroker(brokerId); - kafkaServer.restartBroker(brokerId); - } - - private class ErrorCheckingCallback implements Callback { - @Override - public void onCompletion(RecordMetadata metadata, Exception exception) { - exceptionInCallback = exception; - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerITCase.java deleted file mode 100644 index 8644a796c..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerITCase.java +++ /dev/null @@ -1,820 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.common.typeutils.base.IntSerializer; -import org.apache.flink.runtime.checkpoint.OperatorSubtaskState; -import org.apache.flink.runtime.jobgraph.OperatorID; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper; -import org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness; -import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; -import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; - -import org.apache.kafka.common.errors.ProducerFencedException; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; -import java.util.Properties; -import java.util.UUID; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -import static org.apache.flink.connector.kafka.testutils.KafkaUtil.checkProducerLeak; -import static org.apache.flink.util.ExceptionUtils.findThrowable; -import static org.apache.flink.util.Preconditions.checkState; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.assertj.core.api.Assertions.fail; - -/** - * IT cases for the {@link FlinkKafkaProducer}. - * - *

    Do not run this class in the same junit execution with other tests in your IDE. This may lead - * leaking threads. - */ -public class FlinkKafkaProducerITCase extends KafkaTestBase { - - protected String transactionalId; - protected Properties extraProperties; - - protected TypeInformationSerializationSchema integerSerializationSchema = - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); - protected KeyedSerializationSchema integerKeyedSerializationSchema = - new KeyedSerializationSchemaWrapper<>(integerSerializationSchema); - - @Before - public void before() { - transactionalId = UUID.randomUUID().toString(); - extraProperties = new Properties(); - extraProperties.putAll(standardProps); - extraProperties.put("transactional.id", transactionalId); - extraProperties.put( - "key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); - extraProperties.put( - "value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); - extraProperties.put( - "key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); - extraProperties.put( - "value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); - extraProperties.put("isolation.level", "read_committed"); - } - - @Test - public void resourceCleanUpNone() throws Exception { - resourceCleanUp(FlinkKafkaProducer.Semantic.NONE); - } - - @Test - public void resourceCleanUpAtLeastOnce() throws Exception { - resourceCleanUp(FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); - } - - /** This tests checks whether there is some resource leak in form of growing threads number. */ - public void resourceCleanUp(FlinkKafkaProducer.Semantic semantic) throws Exception { - String topic = "flink-kafka-producer-resource-cleanup-" + semantic; - - try (OneInputStreamOperatorTestHarness testHarness1 = - createTestHarness(topic, 1, 1, 0, semantic)) { - testHarness1.setup(); - testHarness1.open(); - testHarness1.snapshot(1L, 100L); - testHarness1.notifyOfCompletedCheckpoint(1L); - - // test the leak fixed by FLINK-36441 - testHarness1.getOneInputOperator().finish(); - testHarness1.snapshot(2L, 100L); - testHarness1.notifyOfCompletedCheckpoint(2L); - } - checkProducerLeak(); - } - - /** - * This test ensures that transactions reusing transactional.ids (after returning to the pool) - * will not clash with previous transactions using same transactional.ids. - */ - @Test - public void testRestoreToCheckpointAfterExceedingProducersPool() throws Exception { - String topic = "flink-kafka-producer-fail-before-notify"; - - try (OneInputStreamOperatorTestHarness testHarness1 = - createTestHarness(topic)) { - testHarness1.setup(); - testHarness1.open(); - testHarness1.processElement(42, 0); - OperatorSubtaskState snapshot = testHarness1.snapshot(0, 0); - testHarness1.processElement(43, 0); - testHarness1.notifyOfCompletedCheckpoint(0); - try { - for (int i = 0; i < FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE; i++) { - testHarness1.snapshot(i + 1, 0); - testHarness1.processElement(i, 0); - } - throw new IllegalStateException("This should not be reached."); - } catch (Exception ex) { - if (!isCausedBy(FlinkKafkaErrorCode.PRODUCERS_POOL_EMPTY, ex)) { - throw ex; - } - } - - // Resume transactions before testHarness1 is being closed (in case of failures close() - // might not be called) - try (OneInputStreamOperatorTestHarness testHarness2 = - createTestHarness(topic)) { - testHarness2.setup(); - // restore from snapshot1, transactions with records 43 and 44 should be aborted - testHarness2.initializeState(snapshot); - testHarness2.open(); - } - - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42)); - deleteTestTopic(topic); - } catch (Exception ex) { - // testHarness1 will be fenced off after creating and closing testHarness2 - if (!findThrowable(ex, ProducerFencedException.class).isPresent()) { - throw ex; - } - } - checkProducerLeak(); - } - - /** This test hangs when running it in your IDE. */ - @Test - @Ignore - public void testFlinkKafkaProducerFailBeforeNotify() throws Exception { - String topic = "flink-kafka-producer-fail-before-notify"; - - final OneInputStreamOperatorTestHarness testHarness = - createTestHarness(topic); - - testHarness.setup(); - testHarness.open(); - testHarness.processElement(42, 0); - testHarness.snapshot(0, 1); - testHarness.processElement(43, 2); - OperatorSubtaskState snapshot = testHarness.snapshot(1, 3); - - int leaderId = kafkaServer.getLeaderToShutDown(topic); - failBroker(leaderId); - - assertThatThrownBy( - () -> { - testHarness.processElement(44, 4); - testHarness.snapshot(2, 5); - }) - .isInstanceOf(Exception.class); - - try { - testHarness.close(); - } catch (Exception ex) { - } - - kafkaServer.restartBroker(leaderId); - - final OneInputStreamOperatorTestHarness testHarness2 = - createTestHarness(topic); - testHarness2.setup(); - testHarness2.initializeState(snapshot); - testHarness2.close(); - - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 43)); - - deleteTestTopic(topic); - checkProducerLeak(); - } - - /** - * This tests checks whether FlinkKafkaProducer correctly aborts lingering transactions after a - * failure. If such transactions were left alone lingering it consumers would be unable to read - * committed records that were created after this lingering transaction. - */ - @Test - public void testFailBeforeNotifyAndResumeWorkAfterwards() throws Exception { - String topic = "flink-kafka-producer-fail-before-notify"; - - OneInputStreamOperatorTestHarness testHarness1 = createTestHarness(topic); - checkProducerLeak(); - testHarness1.setup(); - testHarness1.open(); - testHarness1.processElement(42, 0); - testHarness1.snapshot(0, 1); - testHarness1.processElement(43, 2); - OperatorSubtaskState snapshot1 = testHarness1.snapshot(1, 3); - - testHarness1.processElement(44, 4); - testHarness1.snapshot(2, 5); - testHarness1.processElement(45, 6); - - // do not close previous testHarness to make sure that closing do not clean up something (in - // case of failure - // there might not be any close) - OneInputStreamOperatorTestHarness testHarness2 = createTestHarness(topic); - testHarness2.setup(); - // restore from snapshot1, transactions with records 44 and 45 should be aborted - testHarness2.initializeState(snapshot1); - testHarness2.open(); - - // write and commit more records, after potentially lingering transactions - testHarness2.processElement(46, 7); - testHarness2.snapshot(4, 8); - testHarness2.processElement(47, 9); - testHarness2.notifyOfCompletedCheckpoint(4); - - // now we should have: - // - records 42 and 43 in committed transactions - // - aborted transactions with records 44 and 45 - // - committed transaction with record 46 - // - pending transaction with record 47 - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 43, 46)); - - try { - testHarness1.close(); - } catch (Exception e) { - // The only acceptable exception is ProducerFencedException because testHarness2 uses - // the same - // transactional ID. - if (!(e.getCause() instanceof ProducerFencedException)) { - fail("Received unexpected exception " + e); - } - } - testHarness2.close(); - deleteTestTopic(topic); - checkProducerLeak(); - } - - @Test - public void testFailAndRecoverSameCheckpointTwice() throws Exception { - String topic = "flink-kafka-producer-fail-and-recover-same-checkpoint-twice"; - - OperatorSubtaskState snapshot1; - try (OneInputStreamOperatorTestHarness testHarness = - createTestHarness(topic)) { - testHarness.setup(); - testHarness.open(); - testHarness.processElement(42, 0); - testHarness.snapshot(0, 1); - testHarness.processElement(43, 2); - snapshot1 = testHarness.snapshot(1, 3); - - testHarness.processElement(44, 4); - } - - try (OneInputStreamOperatorTestHarness testHarness = - createTestHarness(topic)) { - testHarness.setup(); - // restore from snapshot1, transactions with records 44 and 45 should be aborted - testHarness.initializeState(snapshot1); - testHarness.open(); - - // write and commit more records, after potentially lingering transactions - testHarness.processElement(44, 7); - testHarness.snapshot(2, 8); - testHarness.processElement(45, 9); - } - - try (OneInputStreamOperatorTestHarness testHarness = - createTestHarness(topic)) { - testHarness.setup(); - // restore from snapshot1, transactions with records 44 and 45 should be aborted - testHarness.initializeState(snapshot1); - testHarness.open(); - - // write and commit more records, after potentially lingering transactions - testHarness.processElement(44, 7); - testHarness.snapshot(3, 8); - testHarness.processElement(45, 9); - } - - // now we should have: - // - records 42 and 43 in committed transactions - // - aborted transactions with records 44 and 45 - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 43)); - deleteTestTopic(topic); - checkProducerLeak(); - } - - /** - * This tests checks whether FlinkKafkaProducer correctly aborts lingering transactions after a - * failure, which happened before first checkpoint and was followed up by reducing the - * parallelism. If such transactions were left alone lingering it consumers would be unable to - * read committed records that were created after this lingering transaction. - */ - @Test - public void testScaleDownBeforeFirstCheckpoint() throws Exception { - String topic = "scale-down-before-first-checkpoint"; - - List operatorsToClose = new ArrayList<>(); - int preScaleDownParallelism = Math.max(2, FlinkKafkaProducer.SAFE_SCALE_DOWN_FACTOR); - for (int subtaskIndex = 0; subtaskIndex < preScaleDownParallelism; subtaskIndex++) { - OneInputStreamOperatorTestHarness preScaleDownOperator = - createTestHarness( - topic, - preScaleDownParallelism, - preScaleDownParallelism, - subtaskIndex, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - - preScaleDownOperator.setup(); - preScaleDownOperator.open(); - preScaleDownOperator.processElement(subtaskIndex * 2, 0); - preScaleDownOperator.snapshot(0, 1); - preScaleDownOperator.processElement(subtaskIndex * 2 + 1, 2); - - operatorsToClose.add(preScaleDownOperator); - } - - // do not close previous testHarnesses to make sure that closing do not clean up something - // (in case of failure - // there might not be any close) - - // After previous failure simulate restarting application with smaller parallelism - OneInputStreamOperatorTestHarness postScaleDownOperator1 = - createTestHarness(topic, 1, 1, 0, FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - - postScaleDownOperator1.setup(); - postScaleDownOperator1.open(); - - // write and commit more records, after potentially lingering transactions - postScaleDownOperator1.processElement(46, 7); - postScaleDownOperator1.snapshot(4, 8); - postScaleDownOperator1.processElement(47, 9); - postScaleDownOperator1.notifyOfCompletedCheckpoint(4); - - // now we should have: - // - records 42, 43, 44 and 45 in aborted transactions - // - committed transaction with record 46 - // - pending transaction with record 47 - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(46)); - - postScaleDownOperator1.close(); - // ignore ProducerFencedExceptions, because postScaleDownOperator1 could reuse transactional - // ids. - for (AutoCloseable operatorToClose : operatorsToClose) { - closeIgnoringProducerFenced(operatorToClose); - } - deleteTestTopic(topic); - checkProducerLeak(); - } - - /** - * Each instance of FlinkKafkaProducer uses it's own pool of transactional ids. After the - * restore from checkpoint transactional ids are redistributed across the subtasks. In case of - * scale down, the surplus transactional ids are dropped. In case of scale up, new one are - * generated (for the new subtasks). This test make sure that sequence of scaling down and up - * again works fine. Especially it checks whether the newly generated ids in scaling up do not - * overlap with ids that were used before scaling down. For example we start with 4 ids and - * parallelism 4: [1], [2], [3], [4] - one assigned per each subtask we scale down to - * parallelism 2: [1, 2], [3, 4] - first subtask got id 1 and 2, second got ids 3 and 4 surplus - * ids are dropped from the pools and we scale up to parallelism 3: [1 or 2], [3 or 4], [???] - * new subtask have to generate new id(s), but he can not use ids that are potentially in use, - * so it has to generate new ones that are greater then 4. - */ - @Test - public void testScaleUpAfterScalingDown() throws Exception { - String topic = "scale-up-after-scaling-down"; - - final int parallelism1 = 4; - final int parallelism2 = 2; - final int parallelism3 = 3; - final int maxParallelism = Math.max(parallelism1, Math.max(parallelism2, parallelism3)); - - OperatorSubtaskState operatorSubtaskState = - repartitionAndExecute( - topic, - OperatorSubtaskState.builder().build(), - parallelism1, - parallelism1, - maxParallelism, - IntStream.range(0, parallelism1).boxed().iterator()); - - operatorSubtaskState = - repartitionAndExecute( - topic, - operatorSubtaskState, - parallelism1, - parallelism2, - maxParallelism, - IntStream.range(parallelism1, parallelism1 + parallelism2) - .boxed() - .iterator()); - - operatorSubtaskState = - repartitionAndExecute( - topic, - operatorSubtaskState, - parallelism2, - parallelism3, - maxParallelism, - IntStream.range( - parallelism1 + parallelism2, - parallelism1 + parallelism2 + parallelism3) - .boxed() - .iterator()); - - // After each previous repartitionAndExecute call, we are left with some lingering - // transactions, that would - // not allow us to read all committed messages from the topic. Thus we initialize operators - // from - // OperatorSubtaskState once more, but without any new data. This should terminate all - // ongoing transactions. - - repartitionAndExecute( - topic, - operatorSubtaskState, - parallelism3, - 1, - maxParallelism, - Collections.emptyIterator()); - - assertExactlyOnceForTopic( - createProperties(), - topic, - IntStream.range(0, parallelism1 + parallelism2 + parallelism3) - .boxed() - .collect(Collectors.toList())); - deleteTestTopic(topic); - checkProducerLeak(); - } - - private OperatorSubtaskState repartitionAndExecute( - String topic, - OperatorSubtaskState inputStates, - int oldParallelism, - int newParallelism, - int maxParallelism, - Iterator inputData) - throws Exception { - - List outputStates = new ArrayList<>(); - List> testHarnesses = new ArrayList<>(); - - for (int subtaskIndex = 0; subtaskIndex < newParallelism; subtaskIndex++) { - OperatorSubtaskState initState = - AbstractStreamOperatorTestHarness.repartitionOperatorState( - inputStates, - maxParallelism, - oldParallelism, - newParallelism, - subtaskIndex); - - OneInputStreamOperatorTestHarness testHarness = - createTestHarness( - topic, - maxParallelism, - newParallelism, - subtaskIndex, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - testHarnesses.add(testHarness); - - testHarness.setup(); - - testHarness.initializeState(initState); - testHarness.open(); - - if (inputData.hasNext()) { - int nextValue = inputData.next(); - testHarness.processElement(nextValue, 0); - OperatorSubtaskState snapshot = testHarness.snapshot(0, 0); - - outputStates.add(snapshot); - checkState( - snapshot.getRawOperatorState().isEmpty(), "Unexpected raw operator state"); - checkState( - snapshot.getManagedKeyedState().isEmpty(), - "Unexpected managed keyed state"); - checkState(snapshot.getRawKeyedState().isEmpty(), "Unexpected raw keyed state"); - - for (int i = 1; i < FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE - 1; i++) { - testHarness.processElement(-nextValue, 0); - testHarness.snapshot(i, 0); - } - } - } - - for (OneInputStreamOperatorTestHarness testHarness : testHarnesses) { - testHarness.close(); - } - - return AbstractStreamOperatorTestHarness.repackageState( - outputStates.toArray(new OperatorSubtaskState[outputStates.size()])); - } - - @Test - public void testRecoverCommittedTransaction() throws Exception { - String topic = "flink-kafka-producer-recover-committed-transaction"; - - OneInputStreamOperatorTestHarness testHarness = createTestHarness(topic); - - testHarness.setup(); - testHarness.open(); // producerA - start transaction (txn) 0 - testHarness.processElement(42, 0); // producerA - write 42 in txn 0 - OperatorSubtaskState checkpoint0 = - testHarness.snapshot(0, 1); // producerA - pre commit txn 0, producerB - start txn 1 - testHarness.processElement(43, 2); // producerB - write 43 in txn 1 - testHarness.notifyOfCompletedCheckpoint( - 0); // producerA - commit txn 0 and return to the pool - testHarness.snapshot(1, 3); // producerB - pre txn 1, producerA - start txn 2 - testHarness.processElement(44, 4); // producerA - write 44 in txn 2 - testHarness.close(); // producerA - abort txn 2 - - testHarness = createTestHarness(topic); - testHarness.initializeState( - checkpoint0); // recover state 0 - producerA recover and commit txn 0 - testHarness.close(); - - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42)); - - deleteTestTopic(topic); - checkProducerLeak(); - } - - @Test - public void testRunOutOfProducersInThePool() throws Exception { - String topic = "flink-kafka-run-out-of-producers"; - - try (OneInputStreamOperatorTestHarness testHarness = - createTestHarness(topic)) { - - testHarness.setup(); - testHarness.open(); - - for (int i = 0; i < FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE * 2; i++) { - testHarness.processElement(i, i * 2); - testHarness.snapshot(i, i * 2 + 1); - } - } catch (Exception ex) { - if (!ex.getCause().getMessage().startsWith("Too many ongoing")) { - throw ex; - } - } - deleteTestTopic(topic); - checkProducerLeak(); - } - - @Test - public void testMigrateFromAtLeastOnceToExactlyOnce() throws Exception { - String topic = "testMigrateFromAtLeastOnceToExactlyOnce"; - testRecoverWithChangeSemantics( - topic, - FlinkKafkaProducer.Semantic.AT_LEAST_ONCE, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 43, 44, 45)); - deleteTestTopic(topic); - } - - @Test - public void testMigrateFromAtExactlyOnceToAtLeastOnce() throws Exception { - String topic = "testMigrateFromExactlyOnceToAtLeastOnce"; - testRecoverWithChangeSemantics( - topic, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE, - FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 43, 45, 46, 47)); - deleteTestTopic(topic); - } - - @Test - public void testDefaultTransactionalIdPrefix() throws Exception { - Properties properties = createProperties(); - String topic = "testCustomizeTransactionalIdPrefix"; - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>( - topic, - integerKeyedSerializationSchema, - properties, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - - final String taskName = "MyTask"; - final OperatorID operatorID = new OperatorID(); - - String transactionalIdUsed; - try (OneInputStreamOperatorTestHarness testHarness = - new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer), - IntSerializer.INSTANCE, - taskName, - operatorID)) { - testHarness.setup(); - testHarness.open(); - testHarness.processElement(2, 0); - testHarness.snapshot(0, 1); - - transactionalIdUsed = kafkaProducer.getTransactionalId(); - } - - deleteTestTopic(topic); - checkProducerLeak(); - - assertThat(transactionalIdUsed).isNotNull(); - String expectedTransactionalIdPrefix = taskName + "-" + operatorID.toHexString(); - assertThat(transactionalIdUsed).startsWith(expectedTransactionalIdPrefix); - } - - @Test - public void testCustomizeTransactionalIdPrefix() throws Exception { - String transactionalIdPrefix = "my-prefix"; - - Properties properties = createProperties(); - String topic = "testCustomizeTransactionalIdPrefix"; - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>( - topic, - integerKeyedSerializationSchema, - properties, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - kafkaProducer.setTransactionalIdPrefix(transactionalIdPrefix); - - String transactionalIdUsed; - try (OneInputStreamOperatorTestHarness testHarness = - new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer), IntSerializer.INSTANCE)) { - testHarness.setup(); - testHarness.open(); - testHarness.processElement(1, 0); - testHarness.snapshot(0, 1); - - transactionalIdUsed = kafkaProducer.getTransactionalId(); - } - - deleteTestTopic(topic); - checkProducerLeak(); - - assertThat(transactionalIdUsed).isNotNull(); - assertThat(transactionalIdUsed).startsWith(transactionalIdPrefix); - } - - @Test - public void testRestoreUsingDifferentTransactionalIdPrefix() throws Exception { - String topic = "testCustomizeTransactionalIdPrefix"; - Properties properties = createProperties(); - - final String transactionalIdPrefix1 = "my-prefix1"; - FlinkKafkaProducer kafkaProducer1 = - new FlinkKafkaProducer<>( - topic, - integerKeyedSerializationSchema, - properties, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - kafkaProducer1.setTransactionalIdPrefix(transactionalIdPrefix1); - OperatorSubtaskState snapshot; - try (OneInputStreamOperatorTestHarness testHarness1 = - new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer1), IntSerializer.INSTANCE)) { - testHarness1.setup(); - testHarness1.open(); - testHarness1.processElement(42, 0); - snapshot = testHarness1.snapshot(0, 1); - - testHarness1.processElement(43, 2); - } - - final String transactionalIdPrefix2 = "my-prefix2"; - FlinkKafkaProducer kafkaProducer2 = - new FlinkKafkaProducer<>( - topic, - integerKeyedSerializationSchema, - properties, - FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - kafkaProducer2.setTransactionalIdPrefix(transactionalIdPrefix2); - try (OneInputStreamOperatorTestHarness testHarness2 = - new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer2), IntSerializer.INSTANCE)) { - testHarness2.setup(); - // restore from the previous snapshot, transactions with records 43 should be aborted - testHarness2.initializeState(snapshot); - testHarness2.open(); - - testHarness2.processElement(44, 3); - testHarness2.snapshot(1, 4); - testHarness2.processElement(45, 5); - testHarness2.notifyOfCompletedCheckpoint(1); - testHarness2.processElement(46, 6); - } - - assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 44)); - checkProducerLeak(); - } - - private void testRecoverWithChangeSemantics( - String topic, - FlinkKafkaProducer.Semantic fromSemantic, - FlinkKafkaProducer.Semantic toSemantic) - throws Exception { - OperatorSubtaskState producerSnapshot; - try (OneInputStreamOperatorTestHarness testHarness = - createTestHarness(topic, fromSemantic)) { - testHarness.setup(); - testHarness.open(); - testHarness.processElement(42, 0); - testHarness.snapshot(0, 1); - testHarness.processElement(43, 2); - testHarness.notifyOfCompletedCheckpoint(0); - producerSnapshot = testHarness.snapshot(1, 3); - testHarness.processElement(44, 4); - } - - try (OneInputStreamOperatorTestHarness testHarness = - createTestHarness(topic, toSemantic)) { - testHarness.setup(); - testHarness.initializeState(producerSnapshot); - testHarness.open(); - testHarness.processElement(45, 7); - testHarness.snapshot(2, 8); - testHarness.processElement(46, 9); - testHarness.notifyOfCompletedCheckpoint(2); - testHarness.processElement(47, 9); - } - checkProducerLeak(); - } - - // ----------------------------------------------------------------------------------------------------------------- - - // shut down a Kafka broker - private void failBroker(int brokerId) throws Exception { - kafkaServer.stopBroker(brokerId); - } - - private void closeIgnoringProducerFenced(AutoCloseable autoCloseable) throws Exception { - try { - autoCloseable.close(); - } catch (Exception ex) { - if (!(ex.getCause() instanceof ProducerFencedException)) { - throw ex; - } - } - } - - private OneInputStreamOperatorTestHarness createTestHarness(String topic) - throws Exception { - return createTestHarness(topic, FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - } - - private OneInputStreamOperatorTestHarness createTestHarness( - String topic, FlinkKafkaProducer.Semantic semantic) throws Exception { - return createTestHarness(topic, 1, 1, 0, semantic); - } - - private OneInputStreamOperatorTestHarness createTestHarness( - String topic, - int maxParallelism, - int parallelism, - int subtaskIndex, - FlinkKafkaProducer.Semantic semantic) - throws Exception { - Properties properties = createProperties(); - - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>( - topic, integerKeyedSerializationSchema, properties, semantic); - - return new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer), - maxParallelism, - parallelism, - subtaskIndex, - IntSerializer.INSTANCE, - new OperatorID(42, 44)); - } - - private Properties createProperties() { - Properties properties = new Properties(); - properties.putAll(standardProps); - properties.putAll(secureProps); - properties.put(FlinkKafkaProducer.KEY_DISABLE_METRICS, "true"); - return properties; - } - - private boolean isCausedBy(FlinkKafkaErrorCode expectedErrorCode, Throwable ex) { - Optional cause = findThrowable(ex, FlinkKafkaException.class); - if (cause.isPresent()) { - return cause.get().getErrorCode().equals(expectedErrorCode); - } - return false; - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerMigrationTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerMigrationTest.java deleted file mode 100644 index 32c380c70..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerMigrationTest.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.FlinkVersion; -import org.apache.flink.api.common.typeutils.base.IntSerializer; -import org.apache.flink.runtime.jobgraph.OperatorID; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; - -import org.apache.kafka.clients.producer.ProducerConfig; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.util.Collection; -import java.util.Properties; - -/** - * Tests for checking whether {@link FlinkKafkaProducer} can restore from snapshots that were done - * using previous Flink versions' {@link FlinkKafkaProducer}. - * - *

    For regenerating the binary snapshot files run {@link #writeSnapshot()} on the corresponding - * Flink release-* branch. - */ -@RunWith(Parameterized.class) -public class FlinkKafkaProducerMigrationTest extends KafkaMigrationTestBase { - @Parameterized.Parameters(name = "Migration Savepoint: {0}") - public static Collection parameters() { - return FlinkVersion.rangeOf(FlinkVersion.v1_8, FlinkVersion.current()); - } - - public FlinkKafkaProducerMigrationTest(FlinkVersion testMigrateVersion) { - super(testMigrateVersion); - } - - @Override - protected Properties createProperties() { - Properties properties = new Properties(); - properties.putAll(standardProps); - properties.putAll(secureProps); - properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, "producer-transaction-id"); - properties.put(FlinkKafkaProducer.KEY_DISABLE_METRICS, "true"); - return properties; - } - - @Override - protected OneInputStreamOperatorTestHarness createTestHarness() - throws Exception { - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>( - TOPIC, - integerKeyedSerializationSchema, - createProperties(), - FlinkKafkaProducer.Semantic.EXACTLY_ONCE) - .ignoreFailuresAfterTransactionTimeout(); - - return new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer), - 1, - 1, - 0, - IntSerializer.INSTANCE, - new OperatorID(1, 1)); - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerStateSerializerTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerStateSerializerTest.java deleted file mode 100644 index 26d43f147..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerStateSerializerTest.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.typeutils.SerializerTestBase; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction; -import org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction.TransactionHolder; - -import java.util.Collections; -import java.util.Optional; - -/** A test for the {@link TypeSerializer TypeSerializers} used for the Kafka producer state. */ -class FlinkKafkaProducerStateSerializerTest - extends SerializerTestBase< - TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>> { - - @Override - protected TypeSerializer< - TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>> - createSerializer() { - return new TwoPhaseCommitSinkFunction.StateSerializer<>( - new FlinkKafkaProducer.TransactionStateSerializer(), - new FlinkKafkaProducer.ContextStateSerializer()); - } - - @Override - protected Class< - TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>> - getTypeClass() { - return (Class) TwoPhaseCommitSinkFunction.State.class; - } - - @Override - protected int getLength() { - return -1; - } - - @Override - protected TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext> - [] getTestData() { - //noinspection unchecked - return new TwoPhaseCommitSinkFunction.State[] { - new TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 0), - Collections.emptyList(), - Optional.empty()), - new TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 2711), - Collections.singletonList( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 42)), - Optional.empty()), - new TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 0), - Collections.emptyList(), - Optional.of( - new FlinkKafkaProducer.KafkaTransactionContext( - Collections.emptySet()))), - new TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 0), - Collections.emptyList(), - Optional.of( - new FlinkKafkaProducer.KafkaTransactionContext( - Collections.singleton("hello")))), - new TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 0), - Collections.singletonList( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 0)), - Optional.of( - new FlinkKafkaProducer.KafkaTransactionContext( - Collections.emptySet()))), - new TwoPhaseCommitSinkFunction.State< - FlinkKafkaProducer.KafkaTransactionState, - FlinkKafkaProducer.KafkaTransactionContext>( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 0), - Collections.singletonList( - new TransactionHolder( - new FlinkKafkaProducer.KafkaTransactionState( - "fake", 1L, (short) 42, null), - 0)), - Optional.of( - new FlinkKafkaProducer.KafkaTransactionContext( - Collections.singleton("hello")))) - }; - } - - @Override - public void testInstantiate() { - // this serializer does not support instantiation - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerTest.java deleted file mode 100644 index 6fedcc43c..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/FlinkKafkaProducerTest.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.api.common.typeutils.base.IntSerializer; -import org.apache.flink.runtime.jobgraph.OperatorID; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; - -import org.apache.kafka.clients.producer.ProducerRecord; -import org.junit.Test; - -import javax.annotation.Nullable; - -import java.util.Optional; -import java.util.Properties; - -import static org.assertj.core.api.Assertions.assertThat; - -/** Tests for {@link FlinkKafkaProducer}. */ -public class FlinkKafkaProducerTest { - @Test - public void testOpenSerializationSchemaProducer() throws Exception { - OpenTestingSerializationSchema schema = new OpenTestingSerializationSchema(); - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>("localhost:9092", "test-topic", schema); - - OneInputStreamOperatorTestHarness testHarness = - new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer), - 1, - 1, - 0, - IntSerializer.INSTANCE, - new OperatorID(1, 1)); - - testHarness.open(); - - assertThat(schema.openCalled).isTrue(); - } - - @Test - public void testOpenKafkaSerializationSchemaProducer() throws Exception { - OpenTestingKafkaSerializationSchema schema = new OpenTestingKafkaSerializationSchema(); - Properties properties = new Properties(); - properties.put("bootstrap.servers", "localhost:9092"); - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>( - "test-topic", - schema, - properties, - FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); - - OneInputStreamOperatorTestHarness testHarness = - new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer), - 1, - 1, - 0, - IntSerializer.INSTANCE, - new OperatorID(1, 1)); - - testHarness.open(); - - assertThat(schema.openCalled).isTrue(); - } - - @Test - public void testOpenKafkaCustomPartitioner() throws Exception { - CustomPartitioner partitioner = new CustomPartitioner<>(); - Properties properties = new Properties(); - properties.put("bootstrap.servers", "localhost:9092"); - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>( - "test-topic", - new OpenTestingSerializationSchema(), - properties, - Optional.of(partitioner)); - - OneInputStreamOperatorTestHarness testHarness = - new OneInputStreamOperatorTestHarness<>( - new StreamSink<>(kafkaProducer), - 1, - 1, - 0, - IntSerializer.INSTANCE, - new OperatorID(1, 1)); - - testHarness.open(); - - assertThat(partitioner.openCalled).isTrue(); - } - - @Test(expected = NullPointerException.class) - public void testProvidedNullTransactionalIdPrefix() { - FlinkKafkaProducer kafkaProducer = - new FlinkKafkaProducer<>( - "localhost:9092", "test-topic", new OpenTestingSerializationSchema()); - kafkaProducer.setTransactionalIdPrefix(null); - } - - private static class CustomPartitioner extends FlinkKafkaPartitioner { - private boolean openCalled; - - @Override - public void open(int parallelInstanceId, int parallelInstances) { - super.open(parallelInstanceId, parallelInstances); - openCalled = true; - } - - @Override - public int partition( - T record, byte[] key, byte[] value, String targetTopic, int[] partitions) { - return 0; - } - } - - private static class OpenTestingKafkaSerializationSchema - implements KafkaSerializationSchema { - private boolean openCalled; - - @Override - public void open(SerializationSchema.InitializationContext context) throws Exception { - openCalled = true; - } - - @Override - public ProducerRecord serialize(Integer element, @Nullable Long timestamp) { - return null; - } - } - - private static class OpenTestingSerializationSchema implements SerializationSchema { - private boolean openCalled; - - @Override - public void open(SerializationSchema.InitializationContext context) throws Exception { - openCalled = true; - } - - @Override - public byte[] serialize(Integer element) { - return new byte[0]; - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java deleted file mode 100644 index 9a9acdea0..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java +++ /dev/null @@ -1,2734 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.JobID; -import org.apache.flink.api.common.JobStatus; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.api.common.functions.FlatMapFunction; -import org.apache.flink.api.common.functions.RichFlatMapFunction; -import org.apache.flink.api.common.functions.RichMapFunction; -import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.common.serialization.DeserializationSchema; -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.api.common.serialization.SimpleStringSchema; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.state.CheckpointListener; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.common.typeinfo.TypeHint; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.java.tuple.Tuple1; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.api.java.tuple.Tuple3; -import org.apache.flink.client.program.ClusterClient; -import org.apache.flink.client.program.ProgramInvocationException; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.connector.kafka.source.KafkaSource; -import org.apache.flink.connector.kafka.source.KafkaSourceBuilder; -import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; -import org.apache.flink.core.memory.DataInputView; -import org.apache.flink.core.memory.DataInputViewStreamWrapper; -import org.apache.flink.core.memory.DataOutputView; -import org.apache.flink.core.memory.DataOutputViewStreamWrapper; -import org.apache.flink.runtime.client.JobCancellationException; -import org.apache.flink.runtime.client.JobExecutionException; -import org.apache.flink.runtime.jobgraph.JobGraph; -import org.apache.flink.streaming.api.checkpoint.ListCheckpointed; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.DataStreamSource; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.KeyedProcessFunction; -import org.apache.flink.streaming.api.functions.sink.DiscardingSink; -import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; -import org.apache.flink.streaming.api.functions.sink.SinkFunction; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.apache.flink.streaming.api.functions.source.RichSourceFunction; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor; -import org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator; -import org.apache.flink.streaming.connectors.kafka.config.StartupMode; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; -import org.apache.flink.streaming.connectors.kafka.testutils.DataGenerators; -import org.apache.flink.streaming.connectors.kafka.testutils.FailingIdentityMapper; -import org.apache.flink.streaming.connectors.kafka.testutils.KafkaUtils; -import org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper; -import org.apache.flink.streaming.connectors.kafka.testutils.ThrottledMapper; -import org.apache.flink.streaming.connectors.kafka.testutils.Tuple2FlinkPartitioner; -import org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink; -import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; -import org.apache.flink.streaming.util.serialization.TypeInformationKeyValueSerializationSchema; -import org.apache.flink.test.util.SuccessException; -import org.apache.flink.testutils.junit.RetryOnException; -import org.apache.flink.util.Collector; -import org.apache.flink.util.ExceptionUtils; - -import org.apache.commons.io.output.ByteArrayOutputStream; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.OffsetResetStrategy; -import org.apache.kafka.clients.producer.ProducerConfig; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.errors.NotLeaderForPartitionException; -import org.apache.kafka.common.errors.TimeoutException; -import org.junit.Before; - -import javax.annotation.Nullable; -import javax.management.MBeanServer; -import javax.management.ObjectName; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.lang.management.ManagementFactory; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Properties; -import java.util.Random; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicReference; - -import static org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.getRunningJobs; -import static org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilJobIsRunning; -import static org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilNoJobIsRunning; -import static org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult; -import static org.apache.flink.test.util.TestUtils.tryExecute; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -/** Abstract test base for all Kafka consumer tests. */ -@SuppressWarnings("serial") -public abstract class KafkaConsumerTestBase extends KafkaTestBaseWithFlink { - protected final boolean useNewSource; - - private ClusterClient client; - - protected KafkaConsumerTestBase() { - this(false); - } - - protected KafkaConsumerTestBase(boolean useNewSource) { - this.useNewSource = useNewSource; - } - - // ------------------------------------------------------------------------ - // Common Test Preparation - // ------------------------------------------------------------------------ - - /** - * Makes sure that no job is on the JobManager any more from any previous tests that use the - * same mini cluster. Otherwise, missing slots may happen. - */ - @Before - public void setClientAndEnsureNoJobIsLingering() throws Exception { - client = flink.getClusterClient(); - waitUntilNoJobIsRunning(client); - } - - // ------------------------------------------------------------------------ - // Suite of Tests - // - // The tests here are all not activated (by an @Test tag), but need - // to be invoked from the extending classes. That way, the classes can - // select which tests to run. - // ------------------------------------------------------------------------ - - /** - * Test that ensures the KafkaConsumer is properly failing if the topic doesn't exist and a - * wrong broker was specified. - * - * @throws Exception - */ - public void runFailOnNoBrokerTest() throws Exception { - try { - Properties properties = new Properties(); - - StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); - see.setRestartStrategy(RestartStrategies.noRestart()); - see.setParallelism(1); - - // use wrong ports for the consumers - properties.setProperty("bootstrap.servers", "localhost:80"); - properties.setProperty("group.id", "test"); - properties.setProperty("request.timeout.ms", "3000"); // let the test fail fast - properties.setProperty("socket.timeout.ms", "3000"); - properties.setProperty("session.timeout.ms", "2000"); - properties.setProperty("fetch.max.wait.ms", "2000"); - properties.setProperty("heartbeat.interval.ms", "1000"); - properties.putAll(secureProps); - DataStream stream = - getStream(see, "doesntexist", new SimpleStringSchema(), properties); - stream.print(); - see.execute("No broker test"); - } catch (JobExecutionException jee) { - final Optional optionalTimeoutException = - ExceptionUtils.findThrowable(jee, TimeoutException.class); - assertThat(optionalTimeoutException).isPresent(); - - final TimeoutException timeoutException = optionalTimeoutException.get(); - if (useNewSource) { - assertThat(timeoutException) - .hasMessageContaining("Timed out waiting for a node assignment."); - } else { - assertThat(timeoutException) - .hasMessage("Timeout expired while fetching topic metadata"); - } - } - } - - /** - * Ensures that the committed offsets to Kafka are the offsets of "the next record to process". - */ - public void runCommitOffsetsToKafka() throws Exception { - // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition - // should be 50) - final int parallelism = 3; - final int recordsInEachPartition = 50; - - final String topicName = - writeSequence( - "testCommitOffsetsToKafkaTopic", recordsInEachPartition, parallelism, 1); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - env.setParallelism(parallelism); - env.enableCheckpointing(200); - - DataStream stream = - getStream(env, topicName, new SimpleStringSchema(), standardProps); - stream.addSink(new DiscardingSink()); - - final AtomicReference errorRef = new AtomicReference<>(); - final Thread runner = - new Thread("runner") { - @Override - public void run() { - try { - env.execute(); - } catch (Throwable t) { - if (!(t instanceof JobCancellationException)) { - errorRef.set(t); - } - } - } - }; - runner.start(); - - final Long l50 = 50L; // the final committed offset in Kafka should be 50 - final long deadline = 30_000_000_000L + System.nanoTime(); - - KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = - kafkaServer.createOffsetHandler(); - - do { - Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0); - Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1); - Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2); - - if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) { - break; - } - - Thread.sleep(100); - } while (System.nanoTime() < deadline); - - // cancel the job & wait for the job to finish - final Iterator it = getRunningJobs(client).iterator(); - final JobID jobId = it.next(); - client.cancel(jobId).get(); - assertThat(it.hasNext()).isFalse(); - runner.join(); - - final Throwable t = errorRef.get(); - if (t != null) { - throw new RuntimeException("Job failed with an exception", t); - } - - // final check to see if offsets are correctly in Kafka - Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0); - Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1); - Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2); - assertThat(o1).isEqualTo(Long.valueOf(50L)); - assertThat(o2).isEqualTo(Long.valueOf(50L)); - assertThat(o3).isEqualTo(Long.valueOf(50L)); - - kafkaOffsetHandler.close(); - deleteTestTopic(topicName); - } - - /** - * This test ensures that when the consumers retrieve some start offset from kafka (earliest, - * latest), that this offset is committed to Kafka, even if some partitions are not read. - * - *

    Test: - Create 3 partitions - write 50 messages into each. - Start three consumers with - * auto.offset.reset='latest' and wait until they committed into Kafka. - Check if the offsets - * in Kafka are set to 50 for the three partitions - * - *

    See FLINK-3440 as well - */ - public void runAutoOffsetRetrievalAndCommitToKafka() throws Exception { - // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition - // should be 50) - final int parallelism = 3; - final int recordsInEachPartition = 50; - - final String topicName = - writeSequence( - "testAutoOffsetRetrievalAndCommitToKafkaTopic", - recordsInEachPartition, - parallelism, - 1); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - env.setParallelism(parallelism); - env.enableCheckpointing(200); - - Properties readProps = new Properties(); - readProps.putAll(standardProps); - readProps.setProperty( - "auto.offset.reset", - "latest"); // set to reset to latest, so that partitions are initially not read - - DataStream stream = getStream(env, topicName, new SimpleStringSchema(), readProps); - stream.addSink(new DiscardingSink()); - - final AtomicReference errorRef = new AtomicReference<>(); - final Thread runner = - new Thread("runner") { - @Override - public void run() { - try { - env.execute(); - } catch (Throwable t) { - if (!(t instanceof JobCancellationException)) { - errorRef.set(t); - } - } - } - }; - runner.start(); - - KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = - kafkaServer.createOffsetHandler(); - - final Long l50 = 50L; // the final committed offset in Kafka should be 50 - final long deadline = 30_000_000_000L + System.nanoTime(); - do { - Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0); - Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1); - Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2); - - if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) { - break; - } - - Thread.sleep(100); - } while (System.nanoTime() < deadline); - - // cancel the job & wait for the job to finish - final Iterator it = getRunningJobs(client).iterator(); - final JobID jobId = it.next(); - client.cancel(jobId).get(); - assertThat(it.hasNext()).isFalse(); - runner.join(); - - final Throwable t = errorRef.get(); - if (t != null) { - throw new RuntimeException("Job failed with an exception", t); - } - - // final check to see if offsets are correctly in Kafka - Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0); - Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1); - Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2); - assertThat(o1).isEqualTo(Long.valueOf(50L)); - assertThat(o2).isEqualTo(Long.valueOf(50L)); - assertThat(o3).isEqualTo(Long.valueOf(50L)); - - kafkaOffsetHandler.close(); - deleteTestTopic(topicName); - } - - /** - * This test ensures that when explicitly set to start from earliest record, the consumer - * ignores the "auto.offset.reset" behaviour as well as any committed group offsets in Kafka. - */ - public void runStartFromEarliestOffsets() throws Exception { - // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition - // should be 50) - final int parallelism = 3; - final int recordsInEachPartition = 50; - - final String topicName = - writeSequence( - "testStartFromEarliestOffsetsTopic", - recordsInEachPartition, - parallelism, - 1); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - - Properties readProps = new Properties(); - readProps.putAll(standardProps); - readProps.setProperty("auto.offset.reset", "latest"); // this should be ignored - - // the committed offsets should be ignored - KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = - kafkaServer.createOffsetHandler(); - kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23); - kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31); - kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43); - - readSequence( - env, - StartupMode.EARLIEST, - null, - null, - readProps, - parallelism, - topicName, - recordsInEachPartition, - 0); - - kafkaOffsetHandler.close(); - deleteTestTopic(topicName); - } - - /** - * This test ensures that when explicitly set to start from latest record, the consumer ignores - * the "auto.offset.reset" behaviour as well as any committed group offsets in Kafka. - */ - public void runStartFromLatestOffsets() throws Exception { - // 50 records written to each of 3 partitions before launching a latest-starting consuming - // job - final int parallelism = 3; - final int recordsInEachPartition = 50; - - // each partition will be written an extra 200 records - final int extraRecordsInEachPartition = 200; - - // all already existing data in the topic, before the consuming topology has started, should - // be ignored - final String topicName = - writeSequence( - "testStartFromLatestOffsetsTopic", recordsInEachPartition, parallelism, 1); - - // the committed offsets should be ignored - KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = - kafkaServer.createOffsetHandler(); - kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23); - kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31); - kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43); - - // job names for the topologies for writing and consuming the extra records - final String consumeExtraRecordsJobName = "Consume Extra Records Job"; - final String writeExtraRecordsJobName = "Write Extra Records Job"; - - // serialization / deserialization schemas for writing and consuming the extra records - final TypeInformation> resultType = - TypeInformation.of(new TypeHint>() {}); - - final SerializationSchema> serSchema = - new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()); - - final KafkaDeserializationSchema> deserSchema = - new KafkaDeserializationSchemaWrapper<>( - new TypeInformationSerializationSchema<>( - resultType, new ExecutionConfig())); - - // setup and run the latest-consuming job - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - - final Properties readProps = new Properties(); - readProps.putAll(standardProps); - readProps.setProperty("auto.offset.reset", "earliest"); // this should be ignored - - DataStreamSource> stream; - if (useNewSource) { - KafkaSource> source = - kafkaServer - .getSourceBuilder(topicName, deserSchema, readProps) - .setStartingOffsets(OffsetsInitializer.latest()) - .build(); - stream = env.fromSource(source, WatermarkStrategy.noWatermarks(), "KafkaSource"); - } else { - FlinkKafkaConsumerBase> latestReadingConsumer = - kafkaServer.getConsumer(topicName, deserSchema, readProps); - latestReadingConsumer.setStartFromLatest(); - stream = env.addSource(latestReadingConsumer); - } - - stream.setParallelism(parallelism) - .flatMap( - new FlatMapFunction, Object>() { - @Override - public void flatMap( - Tuple2 value, Collector out) - throws Exception { - if (value.f1 - recordsInEachPartition < 0) { - throw new RuntimeException( - "test failed; consumed a record that was previously written: " - + value); - } - } - }) - .setParallelism(1) - .addSink(new DiscardingSink<>()); - - JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); - final JobID consumeJobId = jobGraph.getJobID(); - - final AtomicReference error = new AtomicReference<>(); - Thread consumeThread = - new Thread( - () -> { - try { - submitJobAndWaitForResult( - client, jobGraph, getClass().getClassLoader()); - } catch (Throwable t) { - if (!ExceptionUtils.findThrowable(t, JobCancellationException.class) - .isPresent()) { - error.set(t); - } - } - }); - consumeThread.start(); - - // wait until the consuming job has started, to be extra safe - waitUntilJobIsRunning(client); - - // setup the extra records writing job - final StreamExecutionEnvironment env2 = - StreamExecutionEnvironment.getExecutionEnvironment(); - - env2.setParallelism(parallelism); - - DataStream> extraRecordsStream = - env2.addSource( - new RichParallelSourceFunction>() { - - private boolean running = true; - - @Override - public void run(SourceContext> ctx) - throws Exception { - int count = - recordsInEachPartition; // the extra records should start - // from the last written value - int partition = getRuntimeContext().getIndexOfThisSubtask(); - - while (running - && count - < recordsInEachPartition - + extraRecordsInEachPartition) { - ctx.collect(new Tuple2<>(partition, count)); - count++; - } - } - - @Override - public void cancel() { - running = false; - } - }); - - kafkaServer.produceIntoKafka(extraRecordsStream, topicName, serSchema, readProps, null); - - try { - env2.execute(writeExtraRecordsJobName); - } catch (Exception e) { - throw new RuntimeException("Writing extra records failed", e); - } - - // cancel the consume job after all extra records are written - client.cancel(consumeJobId).get(); - consumeThread.join(); - - kafkaOffsetHandler.close(); - deleteTestTopic(topicName); - - // check whether the consuming thread threw any test errors; - // test will fail here if the consume job had incorrectly read any records other than the - // extra records - final Throwable consumerError = error.get(); - if (consumerError != null) { - throw new Exception("Exception in the consuming thread", consumerError); - } - } - - /** - * This test ensures that the consumer correctly uses group offsets in Kafka, and defaults to - * "auto.offset.reset" behaviour when necessary, when explicitly configured to start from group - * offsets. - * - *

    The partitions and their committed group offsets are setup as: partition 0 --> committed - * offset 23 partition 1 --> no commit offset partition 2 --> committed offset 43 - * - *

    When configured to start from group offsets, each partition should read: partition 0 --> - * start from offset 23, read to offset 49 (27 records) partition 1 --> default to - * "auto.offset.reset" (set to earliest), so start from offset 0, read to offset 49 (50 records) - * partition 2 --> start from offset 43, read to offset 49 (7 records) - */ - public void runStartFromGroupOffsets() throws Exception { - // 3 partitions with 50 records each (offsets 0-49) - final int parallelism = 3; - final int recordsInEachPartition = 50; - - final String topicName = - writeSequence( - "testStartFromGroupOffsetsTopic", recordsInEachPartition, parallelism, 1); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - - Properties readProps = new Properties(); - readProps.putAll(standardProps); - readProps.setProperty("auto.offset.reset", "earliest"); - - // the committed group offsets should be used as starting points - KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = - kafkaServer.createOffsetHandler(); - - // only partitions 0 and 2 have group offsets committed - kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23); - kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43); - - Map> partitionsToValueCountAndStartOffsets = - new HashMap<>(); - partitionsToValueCountAndStartOffsets.put( - 0, new Tuple2<>(27, 23)); // partition 0 should read offset 23-49 - partitionsToValueCountAndStartOffsets.put( - 1, new Tuple2<>(50, 0)); // partition 1 should read offset 0-49 - partitionsToValueCountAndStartOffsets.put( - 2, new Tuple2<>(7, 43)); // partition 2 should read offset 43-49 - - readSequence( - env, - StartupMode.GROUP_OFFSETS, - null, - null, - readProps, - topicName, - partitionsToValueCountAndStartOffsets); - - kafkaOffsetHandler.close(); - deleteTestTopic(topicName); - } - - /** - * This test ensures that the consumer correctly uses user-supplied specific offsets when - * explicitly configured to start from specific offsets. For partitions which a specific offset - * can not be found for, the starting position for them should fallback to the group offsets - * behaviour. - * - *

    4 partitions will have 50 records with offsets 0 to 49. The supplied specific offsets map - * is: partition 0 --> start from offset 19 partition 1 --> not set partition 2 --> start from - * offset 22 partition 3 --> not set partition 4 --> start from offset 26 (this should be - * ignored because the partition does not exist) - * - *

    The partitions and their committed group offsets are setup as: partition 0 --> committed - * offset 23 partition 1 --> committed offset 31 partition 2 --> committed offset 43 partition 3 - * --> no commit offset - * - *

    When configured to start from these specific offsets, each partition should read: - * partition 0 --> start from offset 19, read to offset 49 (31 records) partition 1 --> fallback - * to group offsets, so start from offset 31, read to offset 49 (19 records) partition 2 --> - * start from offset 22, read to offset 49 (28 records) partition 3 --> fallback to group - * offsets, but since there is no group offset for this partition, will default to - * "auto.offset.reset" (set to "earliest"), so start from offset 0, read to offset 49 (50 - * records) - */ - public void runStartFromSpecificOffsets() throws Exception { - // 4 partitions with 50 records each (offsets 0-49) - final int parallelism = 4; - final int recordsInEachPartition = 50; - - final String topicName = - writeSequence( - "testStartFromSpecificOffsetsTopic", - recordsInEachPartition, - parallelism, - 1); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - - Properties readProps = new Properties(); - readProps.putAll(standardProps); - readProps.setProperty( - "auto.offset.reset", - "earliest"); // partition 3 should default back to this behaviour - - Map specificStartupOffsets = new HashMap<>(); - specificStartupOffsets.put(new KafkaTopicPartition(topicName, 0), 19L); - specificStartupOffsets.put(new KafkaTopicPartition(topicName, 2), 22L); - specificStartupOffsets.put( - new KafkaTopicPartition(topicName, 4), - 26L); // non-existing partition, should be ignored - - // only the committed offset for partition 1 should be used, because partition 1 has no - // entry in specific offset map - KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = - kafkaServer.createOffsetHandler(); - kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23); - kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31); - kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43); - - Map> partitionsToValueCountAndStartOffsets = - new HashMap<>(); - partitionsToValueCountAndStartOffsets.put( - 0, new Tuple2<>(31, 19)); // partition 0 should read offset 19-49 - partitionsToValueCountAndStartOffsets.put( - 1, new Tuple2<>(19, 31)); // partition 1 should read offset 31-49 - partitionsToValueCountAndStartOffsets.put( - 2, new Tuple2<>(28, 22)); // partition 2 should read offset 22-49 - partitionsToValueCountAndStartOffsets.put( - 3, new Tuple2<>(50, 0)); // partition 3 should read offset 0-49 - - readSequence( - env, - StartupMode.SPECIFIC_OFFSETS, - specificStartupOffsets, - null, - readProps, - topicName, - partitionsToValueCountAndStartOffsets); - - kafkaOffsetHandler.close(); - deleteTestTopic(topicName); - } - - /** - * This test ensures that the consumer correctly uses user-supplied timestamp when explicitly - * configured to start from timestamp. - * - *

    The validated Kafka data is written in 2 steps: first, an initial 50 records is written to - * each partition. After that, another 30 records is appended to each partition. Before each - * step, a timestamp is recorded. For the validation, when the read job is configured to start - * from the first timestamp, each partition should start from offset 0 and read a total of 80 - * records. When configured to start from the second timestamp, each partition should start from - * offset 50 and read on the remaining 30 appended records. - */ - public void runStartFromTimestamp() throws Exception { - // 4 partitions with 50 records each - final int parallelism = 4; - final int initialRecordsInEachPartition = 50; - final int appendRecordsInEachPartition = 30; - - // attempt to create an appended test sequence, where the timestamp of writing the appended - // sequence - // is assured to be larger than the timestamp of the original sequence. - long firstTimestamp = System.currentTimeMillis(); - String topic = - writeSequence( - "runStartFromTimestamp", initialRecordsInEachPartition, parallelism, 1); - - long secondTimestamp = 0; - while (secondTimestamp <= firstTimestamp) { - Thread.sleep(1000); - secondTimestamp = System.currentTimeMillis(); - } - writeAppendSequence( - topic, initialRecordsInEachPartition, appendRecordsInEachPartition, parallelism); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - - Properties readProps = new Properties(); - readProps.putAll(standardProps); - - readSequence( - env, - StartupMode.TIMESTAMP, - null, - firstTimestamp, - readProps, - parallelism, - topic, - initialRecordsInEachPartition + appendRecordsInEachPartition, - 0); - readSequence( - env, - StartupMode.TIMESTAMP, - null, - secondTimestamp, - readProps, - parallelism, - topic, - appendRecordsInEachPartition, - initialRecordsInEachPartition); - - deleteTestTopic(topic); - } - - /** - * Ensure Kafka is working on both producer and consumer side. This executes a job that contains - * two Flink pipelines. - * - *

    -     * (generator source) --> (kafka sink)-[KAFKA-TOPIC]-(kafka source) --> (validating sink)
    -     * 
    - * - *

    We need to externally retry this test. We cannot let Flink's retry mechanism do it, - * because the Kafka producer does not guarantee exactly-once output. Hence a recovery would - * introduce duplicates that cause the test to fail. - * - *

    This test also ensures that FLINK-3156 doesn't happen again: - * - *

    The following situation caused a NPE in the FlinkKafkaConsumer - * - *

    topic-1 <-- elements are only produced into topic1. topic-2 - * - *

    Therefore, this test is consuming as well from an empty topic. - */ - @RetryOnException(times = 2, exception = NotLeaderForPartitionException.class) - public void runSimpleConcurrentProducerConsumerTopology() throws Exception { - final String topic = "concurrentProducerConsumerTopic_" + UUID.randomUUID(); - final String additionalEmptyTopic = "additionalEmptyTopic_" + UUID.randomUUID(); - - final int parallelism = 3; - final int elementsPerPartition = 100; - final int totalElements = parallelism * elementsPerPartition; - - createTestTopic(topic, parallelism, 1); - createTestTopic( - additionalEmptyTopic, - parallelism, - 1); // create an empty topic which will remain empty all the time - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - env.enableCheckpointing(500); - env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately - - TypeInformation> longStringType = - TypeInformation.of(new TypeHint>() {}); - - TypeInformationSerializationSchema> sourceSchema = - new TypeInformationSerializationSchema<>(longStringType, env.getConfig()); - - TypeInformationSerializationSchema> sinkSchema = - new TypeInformationSerializationSchema<>(longStringType, env.getConfig()); - - // ----------- add producer dataflow ---------- - - DataStream> stream = - env.addSource( - new RichParallelSourceFunction>() { - - private boolean running = true; - - @Override - public void run(SourceContext> ctx) - throws InterruptedException { - int cnt = - getRuntimeContext().getIndexOfThisSubtask() - * elementsPerPartition; - int limit = cnt + elementsPerPartition; - - while (running && cnt < limit) { - ctx.collect(new Tuple2<>(1000L + cnt, "kafka-" + cnt)); - cnt++; - // we delay data generation a bit so that we are sure that some - // checkpoints are - // triggered (for FLINK-3156) - Thread.sleep(50); - } - } - - @Override - public void cancel() { - running = false; - } - }); - Properties producerProperties = - KafkaUtils.getPropertiesFromBrokerList(brokerConnectionStrings); - producerProperties.setProperty("retries", "3"); - producerProperties.putAll(secureProps); - kafkaServer.produceIntoKafka(stream, topic, sinkSchema, producerProperties, null); - - // ----------- add consumer dataflow ---------- - - List topics = new ArrayList<>(); - topics.add(topic); - topics.add(additionalEmptyTopic); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - DataStreamSource> consuming = - getStream(env, topics, sourceSchema, props); - - consuming - .addSink( - new RichSinkFunction>() { - - private int elCnt = 0; - - private BitSet validator = new BitSet(totalElements); - - @Override - public void invoke(Tuple2 value) throws Exception { - String[] sp = value.f1.split("-"); - int v = Integer.parseInt(sp[1]); - assertThat((long) v).isEqualTo(value.f0 - 1000); - assertThat(validator.get(v)).as("Received tuple twice").isFalse(); - validator.set(v); - elCnt++; - if (elCnt == totalElements) { - // check if everything in the bitset is set to true - int nc; - if ((nc = validator.nextClearBit(0)) != totalElements) { - fail( - "The bitset was not set to 1 on all elements. Next clear:" - + nc - + " Set: " - + validator); - } - throw new SuccessException(); - } - } - - @Override - public void close() throws Exception { - super.close(); - } - }) - .setParallelism(1); - - try { - tryExecutePropagateExceptions(env, "runSimpleConcurrentProducerConsumerTopology"); - } catch (ProgramInvocationException | JobExecutionException e) { - // look for NotLeaderForPartitionException - Throwable cause = e.getCause(); - - // search for nested SuccessExceptions - int depth = 0; - while (cause != null && depth++ < 20) { - if (cause instanceof NotLeaderForPartitionException) { - throw (Exception) cause; - } - cause = cause.getCause(); - } - throw e; - } - - deleteTestTopic(topic); - } - - /** - * Tests the proper consumption when having a 1:1 correspondence between kafka partitions and - * Flink sources. - */ - public void runOneToOneExactlyOnceTest() throws Exception { - - final String topic = "oneToOneTopic-" + UUID.randomUUID(); - final int parallelism = 5; - final int numElementsPerPartition = 1000; - final int totalElements = parallelism * numElementsPerPartition; - final int failAfterElements = numElementsPerPartition / 3; - - createTestTopic(topic, parallelism, 1); - - DataGenerators.generateRandomizedIntegerSequence( - StreamExecutionEnvironment.getExecutionEnvironment(), - kafkaServer, - topic, - parallelism, - numElementsPerPartition, - true); - - // run the topology that fails and recovers - - DeserializationSchema schema = - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.enableCheckpointing(500); - env.setParallelism(parallelism); - env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - - getStream(env, topic, schema, props) - .map(new PartitionValidatingMapper(parallelism, 1)) - .map(new FailingIdentityMapper(failAfterElements)) - .addSink(new ValidatingExactlyOnceSink(totalElements)) - .setParallelism(1); - - FailingIdentityMapper.failedBefore = false; - tryExecute(env, "One-to-one exactly once test"); - - deleteTestTopic(topic); - } - - /** - * Tests the proper consumption when having fewer Flink sources than Kafka partitions, so one - * Flink source will read multiple Kafka partitions. - */ - public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception { - final String topic = "oneToManyTopic-" + UUID.randomUUID(); - final int numPartitions = 5; - final int numElementsPerPartition = 1000; - final int totalElements = numPartitions * numElementsPerPartition; - final int failAfterElements = numElementsPerPartition / 3; - - final int parallelism = 2; - - createTestTopic(topic, numPartitions, 1); - - DataGenerators.generateRandomizedIntegerSequence( - StreamExecutionEnvironment.getExecutionEnvironment(), - kafkaServer, - topic, - numPartitions, - numElementsPerPartition, - true); - - // run the topology that fails and recovers - - DeserializationSchema schema = - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.enableCheckpointing(500); - env.setParallelism(parallelism); - env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - - getStream(env, topic, schema, props) - .map(new PartitionValidatingMapper(numPartitions, 3)) - .map(new FailingIdentityMapper(failAfterElements)) - .addSink(new ValidatingExactlyOnceSink(totalElements)) - .setParallelism(1); - - FailingIdentityMapper.failedBefore = false; - tryExecute(env, "One-source-multi-partitions exactly once test"); - - deleteTestTopic(topic); - } - - /** - * Tests the proper consumption when having more Flink sources than Kafka partitions, which - * means that some Flink sources will read no partitions. - */ - public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception { - final String topic = "manyToOneTopic-" + UUID.randomUUID(); - final int numPartitions = 5; - final int numElementsPerPartition = 1000; - final int totalElements = numPartitions * numElementsPerPartition; - final int failAfterElements = numElementsPerPartition / 3; - - final int parallelism = 8; - - createTestTopic(topic, numPartitions, 1); - - DataGenerators.generateRandomizedIntegerSequence( - StreamExecutionEnvironment.getExecutionEnvironment(), - kafkaServer, - topic, - numPartitions, - numElementsPerPartition, - true); - - // run the topology that fails and recovers - - DeserializationSchema schema = - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.enableCheckpointing(500); - env.setParallelism(parallelism); - // set the number of restarts to one. The failing mapper will fail once, then it's only - // success exceptions. - env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); - env.setBufferTimeout(0); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - - getStream(env, topic, schema, props) - .map(new PartitionValidatingMapper(numPartitions, 1)) - // Job only fails after a checkpoint is taken and the necessary number of elements - // is seen - .map(new FailingIdentityMapper(failAfterElements)) - .addSink(new ValidatingExactlyOnceSink(totalElements, true)) - .setParallelism(1); - - FailingIdentityMapper.failedBefore = false; - tryExecute(env, "multi-source-one-partitions exactly once test"); - - deleteTestTopic(topic); - } - - /** Tests that the source can be properly canceled when reading full partitions. */ - public void runCancelingOnFullInputTest() throws Exception { - final String topic = "cancelingOnFullTopic-" + UUID.randomUUID(); - - final int parallelism = 3; - createTestTopic(topic, parallelism, 1); - - // launch a producer thread - DataGenerators.InfiniteStringsGenerator generator = - new DataGenerators.InfiniteStringsGenerator(kafkaServer, topic); - generator.start(); - - // launch a consumer asynchronously - - final AtomicReference jobError = new AtomicReference<>(); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - env.enableCheckpointing(100); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - getStream(env, topic, new SimpleStringSchema(), props) - .addSink(new DiscardingSink()); - - JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); - final JobID jobId = jobGraph.getJobID(); - - final Runnable jobRunner = - () -> { - try { - submitJobAndWaitForResult(client, jobGraph, getClass().getClassLoader()); - } catch (Throwable t) { - jobError.set(t); - } - }; - - Thread runnerThread = new Thread(jobRunner, "program runner thread"); - runnerThread.start(); - - // wait a bit before canceling - Thread.sleep(2000); - - Throwable failueCause = jobError.get(); - if (failueCause != null) { - failueCause.printStackTrace(); - fail("Test failed prematurely with: " + failueCause.getMessage()); - } - - // cancel - client.cancel(jobId).get(); - - // wait for the program to be done and validate that we failed with the right exception - runnerThread.join(); - - assertThat(client.getJobStatus(jobId).get()).isEqualTo(JobStatus.CANCELED); - - if (generator.isAlive()) { - generator.shutdown(); - generator.join(); - } else { - Throwable t = generator.getError(); - if (t != null) { - t.printStackTrace(); - fail("Generator failed: " + t.getMessage()); - } else { - fail("Generator failed with no exception"); - } - } - - deleteTestTopic(topic); - } - - /** Tests that the source can be properly canceled when reading empty partitions. */ - public void runCancelingOnEmptyInputTest() throws Exception { - final String topic = "cancelingOnEmptyInputTopic-" + UUID.randomUUID(); - - final int parallelism = 3; - createTestTopic(topic, parallelism, 1); - - final AtomicReference error = new AtomicReference<>(); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - env.enableCheckpointing(100); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - - getStream(env, topic, new SimpleStringSchema(), props) - .addSink(new DiscardingSink()); - - JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); - final JobID jobId = jobGraph.getJobID(); - - final Runnable jobRunner = - () -> { - try { - submitJobAndWaitForResult(client, jobGraph, getClass().getClassLoader()); - } catch (Throwable t) { - LOG.error("Job Runner failed with exception", t); - error.set(t); - } - }; - - Thread runnerThread = new Thread(jobRunner, "program runner thread"); - runnerThread.start(); - - // wait a bit before canceling - Thread.sleep(2000); - - Throwable failueCause = error.get(); - if (failueCause != null) { - failueCause.printStackTrace(); - fail("Test failed prematurely with: " + failueCause.getMessage()); - } - // cancel - client.cancel(jobId).get(); - - // wait for the program to be done and validate that we failed with the right exception - runnerThread.join(); - - assertThat(client.getJobStatus(jobId).get()).isEqualTo(JobStatus.CANCELED); - - deleteTestTopic(topic); - } - - /** - * Test producing and consuming into multiple topics. - * - * @throws Exception - */ - public void runProduceConsumeMultipleTopics(boolean useLegacySchema) throws Exception { - final String topicNamePrefix = - "runProduceConsumeMultipleTopics-" + (useLegacySchema ? "legacy" : ""); - - final int numTopics = 5; - final int numElements = 20; - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - - // create topics with content - final List topics = new ArrayList<>(); - for (int i = 0; i < numTopics; i++) { - final String topic = topicNamePrefix + i + "-" + UUID.randomUUID(); - topics.add(topic); - // create topic - createTestTopic(topic, i + 1 /*partitions*/, 1); - } - - // before FLINK-6078 the RemoteExecutionEnvironment set the parallelism to 1 as well - env.setParallelism(1); - - // run first job, producing into all topics - DataStream> stream = - env.addSource( - new RichParallelSourceFunction>() { - - @Override - public void run(SourceContext> ctx) { - int partition = getRuntimeContext().getIndexOfThisSubtask(); - - for (int topicId = 0; topicId < numTopics; topicId++) { - for (int i = 0; i < numElements; i++) { - ctx.collect( - new Tuple3<>(partition, i, topics.get(topicId))); - } - } - } - - @Override - public void cancel() {} - }); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - - if (useLegacySchema) { - Tuple2WithTopicSchema schema = new Tuple2WithTopicSchema(env.getConfig()); - kafkaServer.produceIntoKafka(stream, "dummy", schema, props, null); - } else { - TestDeserializer schema = new TestDeserializer(env.getConfig()); - kafkaServer.produceIntoKafka(stream, "dummy", schema, props); - } - - env.execute("Write to topics"); - - // run second job consuming from multiple topics - env = StreamExecutionEnvironment.getExecutionEnvironment(); - - if (useLegacySchema) { - Tuple2WithTopicSchema schema = new Tuple2WithTopicSchema(env.getConfig()); - stream = getStream(env, topics, schema, props); - } else { - TestDeserializer schema = new TestDeserializer(env.getConfig()); - stream = getStream(env, topics, schema, props); - } - - stream.flatMap( - new FlatMapFunction, Integer>() { - Map countPerTopic = new HashMap<>(numTopics); - - @Override - public void flatMap( - Tuple3 value, Collector out) - throws Exception { - Integer count = countPerTopic.get(value.f2); - if (count == null) { - count = 1; - } else { - count++; - } - countPerTopic.put(value.f2, count); - - // check map: - for (Map.Entry el : countPerTopic.entrySet()) { - if (el.getValue() < numElements) { - break; // not enough yet - } - if (el.getValue() > numElements) { - throw new RuntimeException( - "There is a failure in the test. I've read " - + el.getValue() - + " from topic " - + el.getKey()); - } - } - // we've seen messages from all topics - throw new SuccessException(); - } - }) - .setParallelism(1); - - tryExecute(env, "Count elements from the topics"); - - // delete all topics again - for (String topic : topics) { - deleteTestTopic(topic); - } - } - - /** - * Test Flink's Kafka integration also with very big records (30MB). - * - *

    see http://stackoverflow.com/questions/21020347/kafka-sending-a-15mb-message - */ - public void runBigRecordTestTopology() throws Exception { - - final String topic = "bigRecordTestTopic-" + UUID.randomUUID(); - final int parallelism = 1; // otherwise, the kafka mini clusters may run out of heap space - - createTestTopic(topic, parallelism, 1); - - final TypeInformation> longBytesInfo = - TypeInformation.of(new TypeHint>() {}); - - final TypeInformationSerializationSchema> serSchema = - new TypeInformationSerializationSchema<>(longBytesInfo, new ExecutionConfig()); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setRestartStrategy(RestartStrategies.noRestart()); - env.enableCheckpointing(100); - env.setParallelism(parallelism); - - // add consuming topology: - Properties consumerProps = new Properties(); - consumerProps.putAll(standardProps); - consumerProps.setProperty("fetch.message.max.bytes", Integer.toString(1024 * 1024 * 14)); - consumerProps.setProperty( - "max.partition.fetch.bytes", - Integer.toString(1024 * 1024 * 14)); // for the new fetcher - consumerProps.setProperty("queued.max.message.chunks", "1"); - consumerProps.putAll(secureProps); - - DataStreamSource> consuming = - getStream(env, topic, serSchema, consumerProps); - - consuming.addSink( - new SinkFunction>() { - - private int elCnt = 0; - - @Override - public void invoke(Tuple2 value) throws Exception { - elCnt++; - if (value.f0 == -1) { - // we should have seen 11 elements now. - if (elCnt == 11) { - throw new SuccessException(); - } else { - throw new RuntimeException( - "There have been " + elCnt + " elements"); - } - } - if (elCnt > 10) { - throw new RuntimeException("More than 10 elements seen: " + elCnt); - } - } - }); - - // add producing topology - Properties producerProps = new Properties(); - producerProps.setProperty("max.request.size", Integer.toString(1024 * 1024 * 15)); - producerProps.setProperty("retries", "3"); - producerProps.putAll(secureProps); - producerProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerConnectionStrings); - - DataStream> stream = - env.addSource( - new RichSourceFunction>() { - - private boolean running; - - @Override - public void open(Configuration parameters) throws Exception { - super.open(parameters); - running = true; - } - - @Override - public void run(SourceContext> ctx) - throws Exception { - Random rnd = new Random(); - long cnt = 0; - int sevenMb = 1024 * 1024 * 7; - - while (running) { - byte[] wl = new byte[sevenMb + rnd.nextInt(sevenMb)]; - ctx.collect(new Tuple2<>(cnt++, wl)); - - Thread.sleep(100); - - if (cnt == 10) { - // signal end - ctx.collect(new Tuple2<>(-1L, new byte[] {1})); - break; - } - } - } - - @Override - public void cancel() { - running = false; - } - }); - - kafkaServer.produceIntoKafka(stream, topic, serSchema, producerProps, null); - - tryExecute(env, "big topology test"); - deleteTestTopic(topic); - } - - public void runBrokerFailureTest() throws Exception { - final String topic = "brokerFailureTestTopic"; - - // Start a temporary multi-broker cluster. - // This test case relies on stopping a broker and switching partition leader to another - // during the test, so single-broker cluster (kafkaServer) could not fulfill the - // requirement. - KafkaTestEnvironment multiBrokerCluster = constructKafkaTestEnvironment(); - multiBrokerCluster.prepare(KafkaTestEnvironment.createConfig().setKafkaServersNumber(3)); - - final int parallelism = 2; - final int numElementsPerPartition = 1000; - final int totalElements = parallelism * numElementsPerPartition; - final int failAfterElements = numElementsPerPartition / 3; - - multiBrokerCluster.createTestTopic(topic, parallelism, 2); - - DataGenerators.generateRandomizedIntegerSequence( - StreamExecutionEnvironment.getExecutionEnvironment(), - multiBrokerCluster, - topic, - parallelism, - numElementsPerPartition, - true); - - // find leader to shut down - int leaderId = multiBrokerCluster.getLeaderToShutDown(topic); - - LOG.info("Leader to shutdown {}", leaderId); - - // run the topology (the consumers must handle the failures) - - DeserializationSchema schema = - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - env.enableCheckpointing(500); - env.setRestartStrategy(RestartStrategies.noRestart()); - - Properties props = new Properties(); - props.putAll(multiBrokerCluster.getStandardProperties()); - props.putAll(multiBrokerCluster.getSecureProperties()); - - getStream(env, topic, schema, props) - .map(new PartitionValidatingMapper(parallelism, 1)) - .map(new BrokerKillingMapper<>(multiBrokerCluster, leaderId, failAfterElements)) - .addSink(new ValidatingExactlyOnceSink(totalElements)) - .setParallelism(1); - - try { - BrokerKillingMapper.killedLeaderBefore = false; - tryExecute(env, "Broker failure once test"); - } finally { - // Tear down the temporary cluster anyway - multiBrokerCluster.shutdown(); - } - } - - public void runKeyValueTest() throws Exception { - final String topic = "keyvaluetest-" + UUID.randomUUID(); - createTestTopic(topic, 1, 1); - final int elementCount = 5000; - - // ----------- Write some data into Kafka ------------------- - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - env.setRestartStrategy(RestartStrategies.noRestart()); - - DataStream> kvStream = - env.addSource( - new SourceFunction>() { - @Override - public void run(SourceContext> ctx) - throws Exception { - Random rnd = new Random(1337); - for (long i = 0; i < elementCount; i++) { - PojoValue pojo = new PojoValue(); - pojo.when = new Date(rnd.nextLong()); - pojo.lon = rnd.nextLong(); - pojo.lat = i; - // make every second key null to ensure proper "null" - // serialization - Long key = (i % 2 == 0) ? null : i; - ctx.collect(new Tuple2<>(key, pojo)); - } - } - - @Override - public void cancel() {} - }); - - KeyedSerializationSchema> schema = - new TypeInformationKeyValueSerializationSchema<>( - Long.class, PojoValue.class, env.getConfig()); - Properties producerProperties = - KafkaUtils.getPropertiesFromBrokerList(brokerConnectionStrings); - producerProperties.setProperty("retries", "3"); - kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null); - env.execute("Write KV to Kafka"); - - // ----------- Read the data again ------------------- - - env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - env.setRestartStrategy(RestartStrategies.noRestart()); - - KafkaDeserializationSchema> readSchema = - new TypeInformationKeyValueSerializationSchema<>( - Long.class, PojoValue.class, env.getConfig()); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - DataStream> fromKafka = getStream(env, topic, readSchema, props); - fromKafka.flatMap( - new RichFlatMapFunction, Object>() { - - long counter = 0; - - @Override - public void flatMap(Tuple2 value, Collector out) - throws Exception { - // the elements should be in order. - assertThat(value.f1.lat) - .as("Wrong value " + value.f1.lat) - .isEqualTo(counter); - if (value.f1.lat % 2 == 0) { - assertThat(value.f0).as("key was not null").isNull(); - } else { - assertThat(value.f0).as("Wrong value " + value.f0).isEqualTo(counter); - } - counter++; - if (counter == elementCount) { - // we got the right number of elements - throw new SuccessException(); - } - } - }); - - tryExecute(env, "Read KV from Kafka"); - - deleteTestTopic(topic); - } - - private static class PojoValue { - public Date when; - public long lon; - public long lat; - - public PojoValue() {} - } - - /** - * Test delete behavior and metrics for producer. - * - * @throws Exception - */ - public void runAllDeletesTest() throws Exception { - final String topic = "alldeletestest-" + UUID.randomUUID(); - createTestTopic(topic, 1, 1); - final int elementCount = 300; - - // ----------- Write some data into Kafka ------------------- - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - - DataStream> kvStream = - env.addSource( - new SourceFunction>() { - @Override - public void run(SourceContext> ctx) - throws Exception { - Random rnd = new Random(1337); - for (long i = 0; i < elementCount; i++) { - final byte[] key = new byte[200]; - rnd.nextBytes(key); - ctx.collect(new Tuple2<>(key, (PojoValue) null)); - } - } - - @Override - public void cancel() {} - }); - - TypeInformationKeyValueSerializationSchema schema = - new TypeInformationKeyValueSerializationSchema<>( - byte[].class, PojoValue.class, env.getConfig()); - - Properties producerProperties = - KafkaUtils.getPropertiesFromBrokerList(brokerConnectionStrings); - producerProperties.setProperty("retries", "3"); - producerProperties.putAll(secureProps); - kafkaServer.produceIntoKafka(kvStream, topic, schema, producerProperties, null); - - env.execute("Write deletes to Kafka"); - - // ----------- Read the data again ------------------- - - env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - DataStream> fromKafka = getStream(env, topic, schema, props); - - fromKafka.flatMap( - new RichFlatMapFunction, Object>() { - - long counter = 0; - - @Override - public void flatMap(Tuple2 value, Collector out) - throws Exception { - // ensure that deleted messages are passed as nulls - assertThat(value.f1).isNull(); - counter++; - if (counter == elementCount) { - // we got the right number of elements - throw new SuccessException(); - } - } - }); - - tryExecute(env, "Read deletes from Kafka"); - - deleteTestTopic(topic); - } - - /** - * Test that ensures that DeserializationSchema.isEndOfStream() is properly evaluated. - * - * @throws Exception - */ - public void runEndOfStreamTest() throws Exception { - - final int elementCount = 300; - final String topic = writeSequence("testEndOfStream", elementCount, 1, 1); - - // read using custom schema - final StreamExecutionEnvironment env1 = - StreamExecutionEnvironment.getExecutionEnvironment(); - env1.setParallelism(1); - env1.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - DataStream> fromKafka = - getStream(env1, topic, new FixedNumberDeserializationSchema(elementCount), props); - - fromKafka.flatMap( - new FlatMapFunction, Void>() { - @Override - public void flatMap(Tuple2 value, Collector out) - throws Exception { - // noop ;) - } - }); - - tryExecute(env1, "Consume " + elementCount + " elements from Kafka"); - - deleteTestTopic(topic); - } - - /** - * Test that ensures that DeserializationSchema can emit multiple records via a Collector. - * - * @throws Exception - */ - public void runCollectingSchemaTest() throws Exception { - - final int elementCount = 20; - final String topic = writeSequence("testCollectingSchema", elementCount, 1, 1); - - // read using custom schema - final StreamExecutionEnvironment env1 = - StreamExecutionEnvironment.getExecutionEnvironment(); - env1.setParallelism(1); - env1.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - - DataStream> fromKafka = - env1.addSource( - kafkaServer - .getConsumer( - topic, - new CollectingDeserializationSchema(elementCount), - props) - .assignTimestampsAndWatermarks( - new AscendingTimestampExtractor>() { - @Override - public long extractAscendingTimestamp( - Tuple2 element) { - String string = element.f1; - return Long.parseLong( - string.substring(0, string.length() - 1)); - } - })); - fromKafka - .keyBy(t -> t.f0) - .process( - new KeyedProcessFunction, Void>() { - private boolean registered = false; - - @Override - public void processElement( - Tuple2 value, Context ctx, Collector out) - throws Exception { - if (!registered) { - ctx.timerService().registerEventTimeTimer(elementCount - 2); - registered = true; - } - } - - @Override - public void onTimer( - long timestamp, OnTimerContext ctx, Collector out) - throws Exception { - throw new SuccessException(); - } - }); - - tryExecute(env1, "Consume " + elementCount + " elements from Kafka"); - - deleteTestTopic(topic); - } - - /** - * Test metrics reporting for consumer. - * - * @throws Exception - */ - public void runMetricsTest() throws Throwable { - - // create a stream with 5 topics - final String topic = "metricsStream-" + UUID.randomUUID(); - createTestTopic(topic, 5, 1); - - final Tuple1 error = new Tuple1<>(null); - - // start job writing & reading data. - final StreamExecutionEnvironment env1 = - StreamExecutionEnvironment.getExecutionEnvironment(); - env1.setParallelism(1); - env1.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - env1.disableOperatorChaining(); // let the source read everything into the network buffers - - TypeInformationSerializationSchema> schema = - new TypeInformationSerializationSchema<>( - TypeInformation.of(new TypeHint>() {}), - env1.getConfig()); - - DataStream> fromKafka = - getStream(env1, topic, schema, standardProps); - fromKafka.flatMap( - new FlatMapFunction, Void>() { - @Override - public void flatMap(Tuple2 value, Collector out) - throws Exception { // no op - } - }); - - DataStream> fromGen = - env1.addSource( - new RichSourceFunction>() { - boolean running = true; - - @Override - public void run(SourceContext> ctx) - throws Exception { - int i = 0; - while (running) { - ctx.collect( - Tuple2.of( - i++, - getRuntimeContext().getIndexOfThisSubtask())); - Thread.sleep(1); - } - } - - @Override - public void cancel() { - running = false; - } - }); - - kafkaServer.produceIntoKafka(fromGen, topic, schema, standardProps, null); - - JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env1.getStreamGraph()); - final JobID jobId = jobGraph.getJobID(); - - Thread jobThread = - new Thread( - () -> { - try { - submitJobAndWaitForResult( - client, jobGraph, getClass().getClassLoader()); - } catch (Throwable t) { - if (!ExceptionUtils.findThrowable(t, JobCancellationException.class) - .isPresent()) { - LOG.warn("Got exception during execution", t); - error.f0 = t; - } - } - }); - jobThread.start(); - - try { - // connect to JMX - MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); - // wait until we've found all 5 offset metrics - Set offsetMetrics = - mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null); - while (offsetMetrics.size() - < 5) { // test will time out if metrics are not properly working - if (error.f0 != null) { - // fail test early - throw error.f0; - } - offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null); - Thread.sleep(50); - } - assertThat(offsetMetrics).hasSize(5); - // we can't rely on the consumer to have touched all the partitions already - // that's why we'll wait until all five partitions have a positive offset. - // The test will fail if we never meet the condition - while (true) { - int numPosOffsets = 0; - // check that offsets are correctly reported - for (ObjectName object : offsetMetrics) { - Object offset = mBeanServer.getAttribute(object, "Value"); - if ((long) offset >= 0) { - numPosOffsets++; - } - } - if (numPosOffsets == 5) { - break; - } - // wait for the consumer to consume on all partitions - Thread.sleep(50); - } - - // check if producer metrics are also available. - Set producerMetrics = - mBeanServer.queryNames(new ObjectName("*KafkaProducer*:*"), null); - assertThat(producerMetrics.size()).as("No producer metrics found").isGreaterThan(30); - - LOG.info("Found all JMX metrics. Cancelling job."); - } finally { - // cancel - client.cancel(jobId).get(); - // wait for the job to finish (it should due to the cancel command above) - jobThread.join(); - } - - if (error.f0 != null) { - throw error.f0; - } - - deleteTestTopic(topic); - } - - private static class CollectingDeserializationSchema - implements KafkaDeserializationSchema> { - - final int finalCount; - - TypeInformation> ti = - TypeInformation.of(new TypeHint>() {}); - TypeSerializer> ser = - TypeInformation.of(new TypeHint>() {}) - .createSerializer(new ExecutionConfig()); - - public CollectingDeserializationSchema(int finalCount) { - this.finalCount = finalCount; - } - - @Override - public boolean isEndOfStream(Tuple2 nextElement) { - return false; - } - - @Override - public Tuple2 deserialize(ConsumerRecord record) - throws Exception { - throw new UnsupportedOperationException("Should not be called"); - } - - @Override - public void deserialize( - ConsumerRecord message, Collector> out) - throws Exception { - DataInputView in = - new DataInputViewStreamWrapper(new ByteArrayInputStream(message.value())); - Tuple2 tuple = ser.deserialize(in); - out.collect(Tuple2.of(tuple.f0, tuple.f1 + "a")); - out.collect(Tuple2.of(tuple.f0, tuple.f1 + "b")); - } - - @Override - public TypeInformation> getProducedType() { - return ti; - } - } - - private static class FixedNumberDeserializationSchema - implements DeserializationSchema> { - - final int finalCount; - int count = 0; - - TypeInformation> ti = - TypeInformation.of(new TypeHint>() {}); - TypeSerializer> ser = ti.createSerializer(new ExecutionConfig()); - - public FixedNumberDeserializationSchema(int finalCount) { - this.finalCount = finalCount; - } - - @Override - public Tuple2 deserialize(byte[] message) throws IOException { - DataInputView in = new DataInputViewStreamWrapper(new ByteArrayInputStream(message)); - return ser.deserialize(in); - } - - @Override - public boolean isEndOfStream(Tuple2 nextElement) { - return ++count >= finalCount; - } - - @Override - public TypeInformation> getProducedType() { - return ti; - } - } - - // ------------------------------------------------------------------------ - // Reading writing test data sets - // ------------------------------------------------------------------------ - - /** - * Runs a job using the provided environment to read a sequence of records from a single Kafka - * topic. The method allows to individually specify the expected starting offset and total read - * value count of each partition. The job will be considered successful only if all partition - * read results match the start offset and value count criteria. - */ - protected void readSequence( - final StreamExecutionEnvironment env, - final StartupMode startupMode, - final Map specificStartupOffsets, - final Long startupTimestamp, - final Properties cc, - final String topicName, - final Map> partitionsToValuesCountAndStartOffset) - throws Exception { - final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size(); - - int finalCountTmp = 0; - for (Map.Entry> valuesCountAndStartOffset : - partitionsToValuesCountAndStartOffset.entrySet()) { - finalCountTmp += valuesCountAndStartOffset.getValue().f0; - } - final int finalCount = finalCountTmp; - - final TypeInformation> intIntTupleType = - TypeInformation.of(new TypeHint>() {}); - - final TypeInformationSerializationSchema> deser = - new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig()); - - // create the consumer - cc.putAll(secureProps); - DataStreamSource> source; - if (useNewSource) { - KafkaSourceBuilder> sourceBuilder = - kafkaServer.getSourceBuilder(topicName, deser, cc); - Map startOffsets = new HashMap<>(); - if (specificStartupOffsets != null) { - specificStartupOffsets.forEach( - (ktp, offset) -> - startOffsets.put( - new TopicPartition(ktp.getTopic(), ktp.getPartition()), - offset)); - } - setKafkaSourceOffset(startupMode, sourceBuilder, startOffsets, startupTimestamp); - source = - env.fromSource( - sourceBuilder.build(), WatermarkStrategy.noWatermarks(), "KafkaSource"); - } else { - FlinkKafkaConsumerBase> consumer = - kafkaServer.getConsumer(topicName, deser, cc); - setKafkaConsumerOffset(startupMode, consumer, specificStartupOffsets, startupTimestamp); - - source = env.addSource(consumer); - } - - source.setParallelism(sourceParallelism) - .map(new ThrottledMapper<>(20)) - .setParallelism(sourceParallelism) - .flatMap( - new RichFlatMapFunction, Integer>() { - private HashMap partitionsToValueCheck; - private int count = 0; - - @Override - public void open(Configuration parameters) throws Exception { - partitionsToValueCheck = new HashMap<>(); - for (Integer partition : - partitionsToValuesCountAndStartOffset.keySet()) { - partitionsToValueCheck.put(partition, new BitSet()); - } - } - - @Override - public void flatMap( - Tuple2 value, Collector out) - throws Exception { - int partition = value.f0; - int val = value.f1; - - BitSet bitSet = partitionsToValueCheck.get(partition); - if (bitSet == null) { - throw new RuntimeException( - "Got a record from an unknown partition"); - } else { - bitSet.set( - val - - partitionsToValuesCountAndStartOffset.get( - partition) - .f1); - } - - count++; - - LOG.debug("Received message {}, total {} messages", value, count); - - // verify if we've seen everything - if (count == finalCount) { - for (Map.Entry partitionsToValueCheck : - this.partitionsToValueCheck.entrySet()) { - BitSet check = partitionsToValueCheck.getValue(); - int expectedValueCount = - partitionsToValuesCountAndStartOffset.get( - partitionsToValueCheck.getKey()) - .f0; - - if (check.cardinality() != expectedValueCount) { - throw new RuntimeException( - "Expected cardinality to be " - + expectedValueCount - + ", but was " - + check.cardinality()); - } else if (check.nextClearBit(0) != expectedValueCount) { - throw new RuntimeException( - "Expected next clear bit to be " - + expectedValueCount - + ", but was " - + check.cardinality()); - } - } - - // test has passed - throw new SuccessException(); - } - } - }) - .setParallelism(1); - - tryExecute(env, "Read data from Kafka"); - - LOG.info("Successfully read sequence for verification"); - } - - /** - * Variant of {@link KafkaConsumerTestBase#readSequence(StreamExecutionEnvironment, StartupMode, - * Map, Long, Properties, String, Map)} to expect reading from the same start offset and the - * same value count for all partitions of a single Kafka topic. - */ - protected void readSequence( - final StreamExecutionEnvironment env, - final StartupMode startupMode, - final Map specificStartupOffsets, - final Long startupTimestamp, - final Properties cc, - final int sourceParallelism, - final String topicName, - final int valuesCount, - final int startFrom) - throws Exception { - HashMap> partitionsToValuesCountAndStartOffset = - new HashMap<>(); - for (int i = 0; i < sourceParallelism; i++) { - partitionsToValuesCountAndStartOffset.put(i, new Tuple2<>(valuesCount, startFrom)); - } - readSequence( - env, - startupMode, - specificStartupOffsets, - startupTimestamp, - cc, - topicName, - partitionsToValuesCountAndStartOffset); - } - - protected void setKafkaConsumerOffset( - final StartupMode startupMode, - final FlinkKafkaConsumerBase> consumer, - final Map specificStartupOffsets, - final Long startupTimestamp) { - switch (startupMode) { - case EARLIEST: - consumer.setStartFromEarliest(); - break; - case LATEST: - consumer.setStartFromLatest(); - break; - case SPECIFIC_OFFSETS: - consumer.setStartFromSpecificOffsets(specificStartupOffsets); - break; - case GROUP_OFFSETS: - consumer.setStartFromGroupOffsets(); - break; - case TIMESTAMP: - consumer.setStartFromTimestamp(startupTimestamp); - break; - } - } - - protected void setKafkaSourceOffset( - final StartupMode startupMode, - final KafkaSourceBuilder kafkaSourceBuilder, - final Map specificStartupOffsets, - final Long startupTimestamp) { - switch (startupMode) { - case EARLIEST: - kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.earliest()); - break; - case LATEST: - kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.latest()); - break; - case SPECIFIC_OFFSETS: - kafkaSourceBuilder.setStartingOffsets( - OffsetsInitializer.offsets(specificStartupOffsets)); - break; - case GROUP_OFFSETS: - kafkaSourceBuilder.setStartingOffsets( - OffsetsInitializer.committedOffsets(OffsetResetStrategy.EARLIEST)); - break; - case TIMESTAMP: - kafkaSourceBuilder.setStartingOffsets( - OffsetsInitializer.timestamp(startupTimestamp)); - break; - } - } - - protected String writeSequence( - String baseTopicName, - final int numElements, - final int parallelism, - final int replicationFactor) - throws Exception { - LOG.info( - "\n===================================\n" - + "== Writing sequence of " - + numElements - + " into " - + baseTopicName - + " with p=" - + parallelism - + "\n" - + "==================================="); - - final TypeInformation> resultType = - TypeInformation.of(new TypeHint>() {}); - - final SerializationSchema> serSchema = - new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()); - - final KafkaDeserializationSchema> deserSchema = - new KafkaDeserializationSchemaWrapper<>( - new TypeInformationSerializationSchema<>( - resultType, new ExecutionConfig())); - - final int maxNumAttempts = 10; - - for (int attempt = 1; attempt <= maxNumAttempts; attempt++) { - - final String topicName = baseTopicName + '-' + attempt + '-' + UUID.randomUUID(); - - LOG.info("Writing attempt #" + attempt); - - // -------- Write the Sequence -------- - - createTestTopic(topicName, parallelism, replicationFactor); - - StreamExecutionEnvironment writeEnv = - StreamExecutionEnvironment.getExecutionEnvironment(); - writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - DataStream> stream = - writeEnv.addSource( - new RichParallelSourceFunction>() { - - private boolean running = true; - - @Override - public void run(SourceContext> ctx) - throws Exception { - int cnt = 0; - int partition = - getRuntimeContext().getIndexOfThisSubtask(); - - while (running && cnt < numElements) { - ctx.collect(new Tuple2<>(partition, cnt)); - cnt++; - } - } - - @Override - public void cancel() { - running = false; - } - }) - .setParallelism(parallelism); - - // the producer must not produce duplicates - Properties producerProperties = - KafkaUtils.getPropertiesFromBrokerList(brokerConnectionStrings); - producerProperties.setProperty("retries", "0"); - producerProperties.putAll(secureProps); - - kafkaServer - .produceIntoKafka( - stream, - topicName, - serSchema, - producerProperties, - new Tuple2FlinkPartitioner(parallelism)) - .setParallelism(parallelism); - - try { - writeEnv.execute("Write sequence"); - } catch (Exception e) { - LOG.error("Write attempt failed, trying again", e); - deleteTestTopic(topicName); - waitUntilNoJobIsRunning(client); - continue; - } - - LOG.info("Finished writing sequence"); - - // -------- Validate the Sequence -------- - - // we need to validate the sequence, because kafka's producers are not exactly once - LOG.info("Validating sequence"); - - waitUntilNoJobIsRunning(client); - - if (validateSequence(topicName, parallelism, deserSchema, numElements)) { - // everything is good! - return topicName; - } else { - deleteTestTopic(topicName); - // fall through the loop - } - } - - throw new Exception( - "Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts"); - } - - protected void writeAppendSequence( - String topicName, - final int originalNumElements, - final int numElementsToAppend, - final int parallelism) - throws Exception { - - LOG.info( - "\n===================================\n" - + "== Appending sequence of " - + numElementsToAppend - + " into " - + topicName - + "==================================="); - - final TypeInformation> resultType = - TypeInformation.of(new TypeHint>() {}); - - final SerializationSchema> serSchema = - new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()); - - final KafkaDeserializationSchema> deserSchema = - new KafkaDeserializationSchemaWrapper<>( - new TypeInformationSerializationSchema<>( - resultType, new ExecutionConfig())); - - // -------- Write the append sequence -------- - - StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); - writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - DataStream> stream = - writeEnv.addSource( - new RichParallelSourceFunction>() { - - private boolean running = true; - - @Override - public void run(SourceContext> ctx) - throws Exception { - int cnt = originalNumElements; - int partition = getRuntimeContext().getIndexOfThisSubtask(); - - while (running - && cnt - < numElementsToAppend - + originalNumElements) { - ctx.collect(new Tuple2<>(partition, cnt)); - cnt++; - } - } - - @Override - public void cancel() { - running = false; - } - }) - .setParallelism(parallelism); - - // the producer must not produce duplicates - Properties producerProperties = - KafkaUtils.getPropertiesFromBrokerList(brokerConnectionStrings); - producerProperties.setProperty("retries", "0"); - producerProperties.putAll(secureProps); - - kafkaServer - .produceIntoKafka( - stream, - topicName, - serSchema, - producerProperties, - new Tuple2FlinkPartitioner(parallelism)) - .setParallelism(parallelism); - - try { - writeEnv.execute("Write sequence"); - } catch (Exception e) { - throw new Exception("Failed to append sequence to Kafka; append job failed.", e); - } - - LOG.info("Finished writing append sequence"); - - // we need to validate the sequence, because kafka's producers are not exactly once - LOG.info("Validating sequence"); - while (!getRunningJobs(client).isEmpty()) { - Thread.sleep(50); - } - - if (!validateSequence( - topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) { - throw new Exception("Could not append a valid sequence to Kafka."); - } - } - - private boolean validateSequence( - final String topic, - final int parallelism, - KafkaDeserializationSchema> deserSchema, - final int totalNumElements) - throws Exception { - - final StreamExecutionEnvironment readEnv = - StreamExecutionEnvironment.getExecutionEnvironment(); - readEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - readEnv.setParallelism(parallelism); - - Properties readProps = (Properties) standardProps.clone(); - readProps.setProperty("group.id", "flink-tests-validator"); - readProps.putAll(secureProps); - DataStreamSource> dataStreamSource; - - if (useNewSource) { - KafkaSource> source = - kafkaServer - .getSourceBuilder(topic, deserSchema, readProps) - .setStartingOffsets(OffsetsInitializer.earliest()) - .build(); - dataStreamSource = - readEnv.fromSource(source, WatermarkStrategy.noWatermarks(), "KafkaSource"); - } else { - FlinkKafkaConsumerBase> consumer = - kafkaServer.getConsumer(topic, deserSchema, readProps); - consumer.setStartFromEarliest(); - dataStreamSource = readEnv.addSource(consumer); - } - - dataStreamSource - .map( - new RichMapFunction, Tuple2>() { - - private final int totalCount = parallelism * totalNumElements; - private int count = 0; - - @Override - public Tuple2 map(Tuple2 value) - throws Exception { - if (++count == totalCount) { - throw new SuccessException(); - } else { - return value; - } - } - }) - .setParallelism(1) - .addSink(new DiscardingSink<>()) - .setParallelism(1); - - final AtomicReference errorRef = new AtomicReference<>(); - - JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(readEnv.getStreamGraph()); - final JobID jobId = jobGraph.getJobID(); - - Thread runner = - new Thread( - () -> { - try { - submitJobAndWaitForResult( - client, jobGraph, getClass().getClassLoader()); - tryExecute(readEnv, "sequence validation"); - } catch (Throwable t) { - if (!ExceptionUtils.findThrowable(t, SuccessException.class) - .isPresent()) { - errorRef.set(t); - } - } - }); - runner.start(); - - final long deadline = System.nanoTime() + 10_000_000_000L; - long delay; - while (runner.isAlive() && (delay = deadline - System.nanoTime()) > 0) { - runner.join(delay / 1_000_000L); - } - - boolean success; - - if (runner.isAlive()) { - // did not finish in time, maybe the producer dropped one or more records and - // the validation did not reach the exit point - success = false; - client.cancel(jobId).get(); - } else { - Throwable error = errorRef.get(); - if (error != null) { - success = false; - LOG.info("Sequence validation job failed with exception", error); - } else { - success = true; - } - } - - waitUntilNoJobIsRunning(client); - - return success; - } - - private DataStreamSource getStream( - StreamExecutionEnvironment env, - String topic, - DeserializationSchema schema, - Properties props) { - return getStream(env, Collections.singletonList(topic), schema, props); - } - - private DataStreamSource getStream( - StreamExecutionEnvironment env, - String topic, - KafkaDeserializationSchema schema, - Properties props) { - return getStream(env, Collections.singletonList(topic), schema, props); - } - - private DataStreamSource getStream( - StreamExecutionEnvironment env, - List topics, - DeserializationSchema schema, - Properties props) { - if (useNewSource) { - KafkaSource kafkaSource = - kafkaServer.getSourceBuilder(topics, schema, props).build(); - return env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "KafkaSource"); - } else { - FlinkKafkaConsumerBase flinkKafkaConsumer = - kafkaServer.getConsumer(topics, schema, props); - return env.addSource(flinkKafkaConsumer); - } - } - - private DataStreamSource getStream( - StreamExecutionEnvironment env, - List topics, - KafkaDeserializationSchema schema, - Properties props) { - if (useNewSource) { - KafkaSource kafkaSource = - kafkaServer.getSourceBuilder(topics, schema, props).build(); - return env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "KafkaSource"); - } else { - FlinkKafkaConsumerBase flinkKafkaConsumer = - kafkaServer.getConsumer(topics, schema, props); - return env.addSource(flinkKafkaConsumer); - } - } - - // ------------------------------------------------------------------------ - // Debugging utilities - // ------------------------------------------------------------------------ - - private static class BrokerKillingMapper extends RichMapFunction - implements ListCheckpointed, CheckpointListener { - - private static final long serialVersionUID = 6334389850158707313L; - - public static volatile boolean killedLeaderBefore; - public static volatile boolean hasBeenCheckpointedBeforeFailure; - - private static KafkaTestEnvironment kafkaServerToKill; - private final int shutdownBrokerId; - private final int failCount; - private int numElementsTotal; - - private boolean failer; - private boolean hasBeenCheckpointed; - - public BrokerKillingMapper( - KafkaTestEnvironment kafkaServer, int shutdownBrokerId, int failCount) { - kafkaServerToKill = kafkaServer; - this.shutdownBrokerId = shutdownBrokerId; - this.failCount = failCount; - } - - @Override - public void open(Configuration parameters) { - failer = getRuntimeContext().getIndexOfThisSubtask() == 0; - } - - @Override - public T map(T value) throws Exception { - numElementsTotal++; - - if (!killedLeaderBefore) { - Thread.sleep(10); - - if (failer && numElementsTotal >= failCount) { - // shut down a Kafka broker - kafkaServerToKill.stopBroker(shutdownBrokerId); - hasBeenCheckpointedBeforeFailure = hasBeenCheckpointed; - killedLeaderBefore = true; - } - } - return value; - } - - @Override - public void notifyCheckpointComplete(long checkpointId) { - hasBeenCheckpointed = true; - } - - @Override - public void notifyCheckpointAborted(long checkpointId) {} - - @Override - public List snapshotState(long checkpointId, long timestamp) throws Exception { - return Collections.singletonList(this.numElementsTotal); - } - - @Override - public void restoreState(List state) throws Exception { - if (state.isEmpty() || state.size() > 1) { - throw new RuntimeException( - "Test failed due to unexpected recovered state size " + state.size()); - } - this.numElementsTotal = state.get(0); - } - } - - private abstract static class AbstractTestDeserializer - implements KafkaDeserializationSchema> { - - protected final TypeSerializer> ts; - - public AbstractTestDeserializer(ExecutionConfig ec) { - ts = - TypeInformation.of(new TypeHint>() {}) - .createSerializer(ec); - } - - @Override - public Tuple3 deserialize(ConsumerRecord record) - throws Exception { - DataInputView in = - new DataInputViewStreamWrapper(new ByteArrayInputStream(record.value())); - Tuple2 t2 = ts.deserialize(in); - return new Tuple3<>(t2.f0, t2.f1, record.topic()); - } - - @Override - public boolean isEndOfStream(Tuple3 nextElement) { - return false; - } - - @Override - public TypeInformation> getProducedType() { - return TypeInformation.of(new TypeHint>() {}); - } - } - - private static class Tuple2WithTopicSchema extends AbstractTestDeserializer - implements KeyedSerializationSchema> { - - public Tuple2WithTopicSchema(ExecutionConfig ec) { - super(ec); - } - - @Override - public byte[] serializeKey(Tuple3 element) { - return null; - } - - @Override - public byte[] serializeValue(Tuple3 element) { - ByteArrayOutputStream by = new ByteArrayOutputStream(); - DataOutputView out = new DataOutputViewStreamWrapper(by); - try { - ts.serialize(new Tuple2<>(element.f0, element.f1), out); - } catch (IOException e) { - throw new RuntimeException("Error", e); - } - return by.toByteArray(); - } - - @Override - public String getTargetTopic(Tuple3 element) { - return element.f2; - } - } - - private static class TestDeserializer extends AbstractTestDeserializer - implements KafkaSerializationSchema> { - - public TestDeserializer(ExecutionConfig ec) { - super(ec); - } - - @Override - public ProducerRecord serialize( - Tuple3 element, @Nullable Long timestamp) { - ByteArrayOutputStream by = new ByteArrayOutputStream(); - DataOutputView out = new DataOutputViewStreamWrapper(by); - try { - ts.serialize(new Tuple2<>(element.f0, element.f1), out); - } catch (IOException e) { - throw new RuntimeException("Error", e); - } - byte[] serializedValue = by.toByteArray(); - - return new ProducerRecord<>(element.f2, serializedValue); - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaITCase.java deleted file mode 100644 index 68db69187..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaITCase.java +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeinfo.Types; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.java.typeutils.GenericTypeInfo; -import org.apache.flink.core.memory.DataInputView; -import org.apache.flink.core.memory.DataInputViewStreamWrapper; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; -import org.apache.flink.streaming.api.functions.sink.SinkFunction; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; - -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.junit.BeforeClass; -import org.junit.Test; - -import javax.annotation.Nullable; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.util.Optional; -import java.util.UUID; - -/** IT cases for Kafka. */ -public class KafkaITCase extends KafkaConsumerTestBase { - - @BeforeClass - public static void prepare() throws Exception { - KafkaProducerTestBase.prepare(); - ((KafkaTestEnvironmentImpl) kafkaServer) - .setProducerSemantic(FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); - } - - // ------------------------------------------------------------------------ - // Suite of Tests - // ------------------------------------------------------------------------ - - @Test(timeout = 120000) - public void testFailOnNoBroker() throws Exception { - runFailOnNoBrokerTest(); - } - - @Test(timeout = 60000) - public void testConcurrentProducerConsumerTopology() throws Exception { - runSimpleConcurrentProducerConsumerTopology(); - } - - @Test(timeout = 60000) - public void testKeyValueSupport() throws Exception { - runKeyValueTest(); - } - - // --- canceling / failures --- - - @Test(timeout = 60000) - public void testCancelingEmptyTopic() throws Exception { - runCancelingOnEmptyInputTest(); - } - - @Test(timeout = 60000) - public void testCancelingFullTopic() throws Exception { - runCancelingOnFullInputTest(); - } - - // --- source to partition mappings and exactly once --- - - @Test(timeout = 60000) - public void testOneToOneSources() throws Exception { - runOneToOneExactlyOnceTest(); - } - - @Test(timeout = 60000) - public void testOneSourceMultiplePartitions() throws Exception { - runOneSourceMultiplePartitionsExactlyOnceTest(); - } - - @Test(timeout = 60000) - public void testMultipleSourcesOnePartition() throws Exception { - runMultipleSourcesOnePartitionExactlyOnceTest(); - } - - // --- broker failure --- - - @Test(timeout = 60000) - public void testBrokerFailure() throws Exception { - runBrokerFailureTest(); - } - - // --- special executions --- - - @Test(timeout = 60000) - public void testBigRecordJob() throws Exception { - runBigRecordTestTopology(); - } - - @Test(timeout = 60000) - public void testMultipleTopicsWithLegacySerializer() throws Exception { - runProduceConsumeMultipleTopics(true); - } - - @Test(timeout = 60000) - public void testMultipleTopicsWithKafkaSerializer() throws Exception { - runProduceConsumeMultipleTopics(false); - } - - @Test(timeout = 60000) - public void testAllDeletes() throws Exception { - runAllDeletesTest(); - } - - @Test(timeout = 60000) - public void testMetricsAndEndOfStream() throws Exception { - runEndOfStreamTest(); - } - - // --- startup mode --- - - @Test(timeout = 60000) - public void testStartFromEarliestOffsets() throws Exception { - runStartFromEarliestOffsets(); - } - - @Test(timeout = 60000) - public void testStartFromLatestOffsets() throws Exception { - runStartFromLatestOffsets(); - } - - @Test(timeout = 60000) - public void testStartFromGroupOffsets() throws Exception { - runStartFromGroupOffsets(); - } - - @Test(timeout = 60000) - public void testStartFromSpecificOffsets() throws Exception { - runStartFromSpecificOffsets(); - } - - @Test(timeout = 60000) - public void testStartFromTimestamp() throws Exception { - runStartFromTimestamp(); - } - - // --- offset committing --- - - @Test(timeout = 60000) - public void testCommitOffsetsToKafka() throws Exception { - runCommitOffsetsToKafka(); - } - - @Test(timeout = 60000) - public void testAutoOffsetRetrievalAndCommitToKafka() throws Exception { - runAutoOffsetRetrievalAndCommitToKafka(); - } - - @Test(timeout = 60000) - public void testCollectingSchema() throws Exception { - runCollectingSchemaTest(); - } - - /** Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka. */ - @Test(timeout = 60000) - public void testTimestamps() throws Exception { - - final String topic = "tstopic-" + UUID.randomUUID(); - createTestTopic(topic, 3, 1); - - // ---------- Produce an event time stream into Kafka ------------------- - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - - DataStream streamWithTimestamps = - env.addSource( - new SourceFunction() { - private static final long serialVersionUID = -2255115836471289626L; - boolean running = true; - - @Override - public void run(SourceContext ctx) throws Exception { - long i = 0; - while (running) { - ctx.collectWithTimestamp(i, i * 2); - if (i++ == 1110L) { - running = false; - } - } - } - - @Override - public void cancel() { - running = false; - } - }); - - final TypeInformationSerializationSchema longSer = - new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig()); - FlinkKafkaProducer prod = - new FlinkKafkaProducer<>( - topic, - new KeyedSerializationSchemaWrapper<>(longSer), - standardProps, - Optional.of( - new FlinkKafkaPartitioner() { - private static final long serialVersionUID = - -6730989584364230617L; - - @Override - public int partition( - Long next, - byte[] key, - byte[] value, - String targetTopic, - int[] partitions) { - return (int) (next % 3); - } - })); - prod.setWriteTimestampToKafka(true); - - streamWithTimestamps.addSink(prod).setParallelism(3); - - env.execute("Produce some"); - - // ---------- Consume stream from Kafka ------------------- - - env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); - - FlinkKafkaConsumer kafkaSource = - new FlinkKafkaConsumer<>( - topic, new KafkaITCase.LimitedLongDeserializer(), standardProps); - kafkaSource.assignTimestampsAndWatermarks( - new AssignerWithPunctuatedWatermarks() { - private static final long serialVersionUID = -4834111173247835189L; - - @Nullable - @Override - public Watermark checkAndGetNextWatermark( - Long lastElement, long extractedTimestamp) { - if (lastElement % 11 == 0) { - return new Watermark(lastElement); - } - return null; - } - - @Override - public long extractTimestamp(Long element, long previousElementTimestamp) { - return previousElementTimestamp; - } - }); - - DataStream stream = env.addSource(kafkaSource); - GenericTypeInfo objectTypeInfo = new GenericTypeInfo<>(Object.class); - stream.transform( - "timestamp validating operator", - objectTypeInfo, - new TimestampValidatingOperator()) - .setParallelism(1); - - env.execute("Consume again"); - - deleteTestTopic(topic); - } - - private static class TimestampValidatingOperator extends StreamSink { - - private static final long serialVersionUID = 1353168781235526806L; - - public TimestampValidatingOperator() { - super( - new SinkFunction() { - private static final long serialVersionUID = -6676565693361786524L; - - @Override - public void invoke(Long value) throws Exception { - throw new RuntimeException("Unexpected"); - } - }); - } - - long elCount = 0; - long wmCount = 0; - long lastWM = Long.MIN_VALUE; - - @Override - public void processElement(StreamRecord element) throws Exception { - elCount++; - if (element.getValue() * 2 != element.getTimestamp()) { - throw new RuntimeException("Invalid timestamp: " + element); - } - } - - @Override - public void processWatermark(Watermark mark) throws Exception { - wmCount++; - - if (lastWM <= mark.getTimestamp()) { - lastWM = mark.getTimestamp(); - } else { - throw new RuntimeException("Received watermark higher than the last one"); - } - - if (mark.getTimestamp() % 11 != 0 && mark.getTimestamp() != Long.MAX_VALUE) { - throw new RuntimeException("Invalid watermark: " + mark.getTimestamp()); - } - } - - @Override - public void close() throws Exception { - super.close(); - if (elCount != 1110L) { - throw new RuntimeException("Wrong final element count " + elCount); - } - - if (wmCount <= 2) { - throw new RuntimeException("Almost no watermarks have been sent " + wmCount); - } - } - } - - private static class LimitedLongDeserializer implements KafkaDeserializationSchema { - - private static final long serialVersionUID = 6966177118923713521L; - private final TypeInformation ti; - private final TypeSerializer ser; - long cnt = 0; - - public LimitedLongDeserializer() { - this.ti = Types.LONG; - this.ser = ti.createSerializer(new ExecutionConfig()); - } - - @Override - public TypeInformation getProducedType() { - return ti; - } - - @Override - public Long deserialize(ConsumerRecord record) throws IOException { - cnt++; - DataInputView in = - new DataInputViewStreamWrapper(new ByteArrayInputStream(record.value())); - Long e = ser.deserialize(in); - return e; - } - - @Override - public boolean isEndOfStream(Long nextElement) { - return cnt > 1110L; - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaMigrationTestBase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaMigrationTestBase.java deleted file mode 100644 index b08bb05e3..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaMigrationTestBase.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.FlinkVersion; -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.runtime.checkpoint.OperatorSubtaskState; -import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper; -import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; -import org.apache.flink.streaming.util.OperatorSnapshotUtil; -import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; - -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Arrays; -import java.util.Optional; -import java.util.Properties; - -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.apache.flink.util.Preconditions.checkState; - -/** The base class with migration tests for the Kafka Exactly-Once Producer. */ -@SuppressWarnings("serial") -public abstract class KafkaMigrationTestBase extends KafkaTestBase { - - protected static final Logger LOG = LoggerFactory.getLogger(KafkaMigrationTestBase.class); - protected static final String TOPIC = "flink-kafka-producer-migration-test"; - - protected final FlinkVersion testMigrateVersion; - protected final TypeInformationSerializationSchema integerSerializationSchema = - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); - protected final KeyedSerializationSchema integerKeyedSerializationSchema = - new KeyedSerializationSchemaWrapper<>(integerSerializationSchema); - - /** - * TODO change this to the corresponding savepoint version to be written (e.g. {@link - * FlinkVersion#v1_3} for 1.3) TODO and remove all @Ignore annotations on write*Snapshot() - * methods to generate savepoints TODO Note: You should generate the savepoint based on the - * release branch instead of the master. - */ - protected final Optional flinkGenerateSavepointVersion = Optional.empty(); - - public KafkaMigrationTestBase(FlinkVersion testMigrateVersion) { - this.testMigrateVersion = checkNotNull(testMigrateVersion); - } - - public String getOperatorSnapshotPath() { - return getOperatorSnapshotPath(testMigrateVersion); - } - - public String getOperatorSnapshotPath(FlinkVersion version) { - return "src/test/resources/kafka-migration-kafka-producer-flink-" + version + "-snapshot"; - } - - /** - * Override {@link KafkaTestBase}. Kafka Migration Tests are starting up Kafka/ZooKeeper cluster - * manually - */ - @BeforeClass - public static void prepare() throws Exception {} - - /** - * Override {@link KafkaTestBase}. Kafka Migration Tests are starting up Kafka/ZooKeeper cluster - * manually - */ - @AfterClass - public static void shutDownServices() throws Exception {} - - /** Manually run this to write binary snapshot data. */ - @Ignore - @Test - public void writeSnapshot() throws Exception { - try { - checkState(flinkGenerateSavepointVersion.isPresent()); - startClusters(); - - OperatorSubtaskState snapshot = initializeTestState(); - OperatorSnapshotUtil.writeStateHandle( - snapshot, getOperatorSnapshotPath(flinkGenerateSavepointVersion.get())); - } finally { - shutdownClusters(); - } - } - - private OperatorSubtaskState initializeTestState() throws Exception { - try (OneInputStreamOperatorTestHarness testHarness = createTestHarness()) { - testHarness.setup(); - testHarness.open(); - - // Create a committed transaction - testHarness.processElement(42, 0L); - - // TODO: when stop with savepoint is available, replace this code with it (with stop - // with savepoint - // there won't be any pending transactions) - OperatorSubtaskState snapshot = testHarness.snapshot(0L, 1L); - // We kind of simulate stop with savepoint by making sure that - // notifyOfCompletedCheckpoint is called - testHarness.notifyOfCompletedCheckpoint(0L); - - // Create a Pending transaction - testHarness.processElement(43, 2L); - return snapshot; - } - } - - @SuppressWarnings("warning") - @Test - public void testRestoreProducer() throws Exception { - try { - startClusters(); - - initializeTestState(); - - try (OneInputStreamOperatorTestHarness testHarness = createTestHarness()) { - initializeState(testHarness); - - // Create a committed transaction - testHarness.processElement(44, 4L); - testHarness.snapshot(2L, 5L); - testHarness.notifyOfCompletedCheckpoint(2L); - - // Create a pending transaction - testHarness.processElement(45, 6L); - - // We should have: - // - committed transaction 42 - // - transaction 43 aborted - // - committed transaction 44 - // - transaction 45 pending - assertExactlyOnceForTopic(createProperties(), TOPIC, Arrays.asList(42, 44)); - } - } finally { - shutdownClusters(); - } - } - - protected abstract OneInputStreamOperatorTestHarness createTestHarness() - throws Exception; - - protected abstract Properties createProperties(); - - protected void initializeState(OneInputStreamOperatorTestHarness testHarness) throws Exception { - testHarness.setup(); - testHarness.initializeState(getOperatorSnapshotPath()); - testHarness.open(); - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerAtLeastOnceITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerAtLeastOnceITCase.java deleted file mode 100644 index aae2680bd..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerAtLeastOnceITCase.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.junit.BeforeClass; - -/** IT cases for the {@link FlinkKafkaProducer}. */ -@SuppressWarnings("serial") -public class KafkaProducerAtLeastOnceITCase extends KafkaProducerTestBase { - - @BeforeClass - public static void prepare() throws Exception { - KafkaProducerTestBase.prepare(); - ((KafkaTestEnvironmentImpl) kafkaServer) - .setProducerSemantic(FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); - } - - @Override - public void testExactlyOnceRegularSink() throws Exception { - // disable test for at least once semantic - } - - @Override - public void testExactlyOnceCustomOperator() throws Exception { - // disable test for at least once semantic - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerExactlyOnceITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerExactlyOnceITCase.java deleted file mode 100644 index f8b20ae02..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerExactlyOnceITCase.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.junit.BeforeClass; -import org.junit.Test; - -/** IT cases for the {@link FlinkKafkaProducer}. */ -@SuppressWarnings("serial") -public class KafkaProducerExactlyOnceITCase extends KafkaProducerTestBase { - @BeforeClass - public static void prepare() throws Exception { - KafkaProducerTestBase.prepare(); - ((KafkaTestEnvironmentImpl) kafkaServer) - .setProducerSemantic(FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - } - - @Test - public void testMultipleSinkOperators() throws Exception { - testExactlyOnce(false, 2); - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java deleted file mode 100644 index cf3bf463f..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.common.functions.RichMapFunction; -import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.common.typeinfo.TypeHint; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.sink.SinkFunction; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.connectors.kafka.testutils.FailingIdentityMapper; -import org.apache.flink.streaming.connectors.kafka.testutils.IntegerSource; -import org.apache.flink.streaming.connectors.kafka.testutils.KafkaUtils; -import org.apache.flink.test.util.SuccessException; -import org.apache.flink.test.util.TestUtils; -import org.apache.flink.util.Preconditions; - -import org.junit.Test; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; - -import static org.apache.flink.test.util.TestUtils.tryExecute; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -/** Abstract test base for all Kafka producer tests. */ -@SuppressWarnings("serial") -public abstract class KafkaProducerTestBase extends KafkaTestBaseWithFlink { - - /** - * This tests verifies that custom partitioning works correctly, with a default topic and - * dynamic topic. The number of partitions for each topic is deliberately different. - * - *

    Test topology: - * - *

    -     *             +------> (sink) --+--> [DEFAULT_TOPIC-1] --> (source) -> (map) -----+
    -     *            /                  |                             |          |        |
    -     *           |                   |                             |          |  ------+--> (sink)
    -     *             +------> (sink) --+--> [DEFAULT_TOPIC-2] --> (source) -> (map) -----+
    -     *            /                  |
    -     *           |                   |
    -     * (source) ----------> (sink) --+--> [DYNAMIC_TOPIC-1] --> (source) -> (map) -----+
    -     *           |                   |                             |          |        |
    -     *            \                  |                             |          |        |
    -     *             +------> (sink) --+--> [DYNAMIC_TOPIC-2] --> (source) -> (map) -----+--> (sink)
    -     *           |                   |                             |          |        |
    -     *            \                  |                             |          |        |
    -     *             +------> (sink) --+--> [DYNAMIC_TOPIC-3] --> (source) -> (map) -----+
    -     * 
    - * - *

    Each topic has an independent mapper that validates the values come consistently from the - * correct Kafka partition of the topic is is responsible of. - * - *

    Each topic also has a final sink that validates that there are no duplicates and that all - * partitions are present. - */ - @Test - public void testCustomPartitioning() { - try { - LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()"); - - final String defaultTopic = "defaultTopic-" + UUID.randomUUID(); - final int defaultTopicPartitions = 2; - - final String dynamicTopic = "dynamicTopic-" + UUID.randomUUID(); - final int dynamicTopicPartitions = 3; - - createTestTopic(defaultTopic, defaultTopicPartitions, 1); - createTestTopic(dynamicTopic, dynamicTopicPartitions, 1); - - Map expectedTopicsToNumPartitions = new HashMap<>(2); - expectedTopicsToNumPartitions.put(defaultTopic, defaultTopicPartitions); - expectedTopicsToNumPartitions.put(dynamicTopic, dynamicTopicPartitions); - - TypeInformation> longStringInfo = - TypeInformation.of(new TypeHint>() {}); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setRestartStrategy(RestartStrategies.noRestart()); - - TypeInformationSerializationSchema> serSchema = - new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig()); - - TypeInformationSerializationSchema> deserSchema = - new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig()); - - // ------ producing topology --------- - - // source has DOP 1 to make sure it generates no duplicates - DataStream> stream = - env.addSource( - new SourceFunction>() { - - private boolean running = true; - - @Override - public void run(SourceContext> ctx) - throws Exception { - long cnt = 0; - while (running) { - ctx.collect( - new Tuple2( - cnt, "kafka-" + cnt)); - cnt++; - if (cnt % 100 == 0) { - Thread.sleep(1); - } - } - } - - @Override - public void cancel() { - running = false; - } - }) - .setParallelism(1); - - Properties props = new Properties(); - props.putAll(KafkaUtils.getPropertiesFromBrokerList(brokerConnectionStrings)); - props.putAll(secureProps); - - // sink partitions into - kafkaServer - .produceIntoKafka( - stream, - defaultTopic, - // this serialization schema will route between the default topic and - // dynamic topic - new CustomKeyedSerializationSchemaWrapper( - serSchema, defaultTopic, dynamicTopic), - props, - new CustomPartitioner(expectedTopicsToNumPartitions)) - .setParallelism(Math.max(defaultTopicPartitions, dynamicTopicPartitions)); - - // ------ consuming topology --------- - - Properties consumerProps = new Properties(); - consumerProps.putAll(standardProps); - consumerProps.putAll(secureProps); - - FlinkKafkaConsumerBase> defaultTopicSource = - kafkaServer.getConsumer(defaultTopic, deserSchema, consumerProps); - FlinkKafkaConsumerBase> dynamicTopicSource = - kafkaServer.getConsumer(dynamicTopic, deserSchema, consumerProps); - - env.addSource(defaultTopicSource) - .setParallelism(defaultTopicPartitions) - .map(new PartitionValidatingMapper(defaultTopicPartitions)) - .setParallelism(defaultTopicPartitions) - .addSink(new PartitionValidatingSink(defaultTopicPartitions)) - .setParallelism(1); - - env.addSource(dynamicTopicSource) - .setParallelism(dynamicTopicPartitions) - .map(new PartitionValidatingMapper(dynamicTopicPartitions)) - .setParallelism(dynamicTopicPartitions) - .addSink(new PartitionValidatingSink(dynamicTopicPartitions)) - .setParallelism(1); - - tryExecute(env, "custom partitioning test"); - - deleteTestTopic(defaultTopic); - deleteTestTopic(dynamicTopic); - - LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()"); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - /** Tests the exactly-once semantic for the simple writes into Kafka. */ - @Test - public void testExactlyOnceRegularSink() throws Exception { - testExactlyOnce(true, 1); - } - - /** Tests the exactly-once semantic for the simple writes into Kafka. */ - @Test - public void testExactlyOnceCustomOperator() throws Exception { - testExactlyOnce(false, 1); - } - - /** - * This test sets KafkaProducer so that it will automatically flush the data and and fails the - * broker to check whether flushed records since last checkpoint were not duplicated. - */ - protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception { - final String topicNamePrefix = - (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") - + sinksCount; - final int partition = 0; - final int numElements = 1000; - final int failAfterElements = 333; - - final List topics = new ArrayList<>(); - for (int i = 0; i < sinksCount; i++) { - final String topic = topicNamePrefix + i + "-" + UUID.randomUUID(); - topics.add(topic); - createTestTopic(topic, 1, 1); - } - - TypeInformationSerializationSchema schema = - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.enableCheckpointing(500); - env.setParallelism(1); - env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); - - Properties properties = new Properties(); - properties.putAll(standardProps); - properties.putAll(secureProps); - - // process exactly failAfterElements number of elements and then shutdown Kafka broker and - // fail application - List expectedElements = getIntegersSequence(numElements); - - DataStream inputStream = - env.addSource(new IntegerSource(numElements)) - .map(new FailingIdentityMapper(failAfterElements)); - - for (int i = 0; i < sinksCount; i++) { - FlinkKafkaPartitioner partitioner = - new FlinkKafkaPartitioner() { - @Override - public int partition( - Integer record, - byte[] key, - byte[] value, - String targetTopic, - int[] partitions) { - return partition; - } - }; - - if (regularSink) { - StreamSink kafkaSink = - kafkaServer.getProducerSink(topics.get(i), schema, properties, partitioner); - inputStream.addSink(kafkaSink.getUserFunction()); - } else { - kafkaServer.produceIntoKafka( - inputStream, topics.get(i), schema, properties, partitioner); - } - } - - FailingIdentityMapper.failedBefore = false; - TestUtils.tryExecute(env, "Exactly once test"); - - for (int i = 0; i < sinksCount; i++) { - // assert that before failure we successfully snapshot/flushed all expected elements - assertExactlyOnceForTopic(properties, topics.get(i), expectedElements); - deleteTestTopic(topics.get(i)); - } - } - - private List getIntegersSequence(int size) { - List result = new ArrayList<>(size); - for (int i = 0; i < size; i++) { - result.add(i); - } - return result; - } - - // ------------------------------------------------------------------------ - - private static class CustomPartitioner extends FlinkKafkaPartitioner> - implements Serializable { - - private final Map expectedTopicsToNumPartitions; - - public CustomPartitioner(Map expectedTopicsToNumPartitions) { - this.expectedTopicsToNumPartitions = expectedTopicsToNumPartitions; - } - - @Override - public int partition( - Tuple2 next, - byte[] serializedKey, - byte[] serializedValue, - String topic, - int[] partitions) { - assertThat(partitions).hasSize(expectedTopicsToNumPartitions.get(topic).intValue()); - - return (int) (next.f0 % partitions.length); - } - } - - /** - * A {@link KeyedSerializationSchemaWrapper} that supports routing serialized records to - * different target topics. - */ - public static class CustomKeyedSerializationSchemaWrapper - extends KeyedSerializationSchemaWrapper> { - - private final String defaultTopic; - private final String dynamicTopic; - - public CustomKeyedSerializationSchemaWrapper( - SerializationSchema> serializationSchema, - String defaultTopic, - String dynamicTopic) { - - super(serializationSchema); - - this.defaultTopic = Preconditions.checkNotNull(defaultTopic); - this.dynamicTopic = Preconditions.checkNotNull(dynamicTopic); - } - - @Override - public String getTargetTopic(Tuple2 element) { - return (element.f0 % 2 == 0) ? defaultTopic : dynamicTopic; - } - } - - /** Mapper that validates partitioning and maps to partition. */ - public static class PartitionValidatingMapper - extends RichMapFunction, Integer> { - - private final int numPartitions; - - private int ourPartition = -1; - - public PartitionValidatingMapper(int numPartitions) { - this.numPartitions = numPartitions; - } - - @Override - public Integer map(Tuple2 value) throws Exception { - int partition = value.f0.intValue() % numPartitions; - if (ourPartition != -1) { - assertThat(partition).as("inconsistent partitioning").isEqualTo(ourPartition); - } else { - ourPartition = partition; - } - return partition; - } - } - - /** - * Sink that validates records received from each partition and checks that there are no - * duplicates. - */ - public static class PartitionValidatingSink implements SinkFunction { - private final int[] valuesPerPartition; - - public PartitionValidatingSink(int numPartitions) { - this.valuesPerPartition = new int[numPartitions]; - } - - @Override - public void invoke(Integer value) throws Exception { - valuesPerPartition[value]++; - - boolean missing = false; - for (int i : valuesPerPartition) { - if (i < 100) { - missing = true; - break; - } - } - if (!missing) { - throw new SuccessException(); - } - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaSerializerUpgradeTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaSerializerUpgradeTest.java deleted file mode 100644 index 90ce2e5eb..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaSerializerUpgradeTest.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.FlinkVersion; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.common.typeutils.TypeSerializerSchemaCompatibility; -import org.apache.flink.streaming.connectors.kafka.internals.FlinkKafkaInternalProducer; -import org.apache.flink.streaming.connectors.kafka.testutils.TypeSerializerMatchers; -import org.apache.flink.streaming.connectors.kafka.testutils.TypeSerializerUpgradeTestBase; - -import org.hamcrest.Matcher; -import org.mockito.Mockito; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; - -import static org.hamcrest.Matchers.is; - -/** - * A {@link TypeSerializerUpgradeTestBase} for {@link FlinkKafkaProducer.TransactionStateSerializer} - * and {@link FlinkKafkaProducer.ContextStateSerializer}. - */ -class KafkaSerializerUpgradeTest extends TypeSerializerUpgradeTestBase { - - public Collection> createTestSpecifications() throws Exception { - - ArrayList> testSpecifications = new ArrayList<>(); - for (FlinkVersion flinkVersion : MIGRATION_VERSIONS) { - testSpecifications.add( - new TestSpecification<>( - "transaction-state-serializer", - flinkVersion, - TransactionStateSerializerSetup.class, - TransactionStateSerializerVerifier.class)); - testSpecifications.add( - new TestSpecification<>( - "context-state-serializer", - flinkVersion, - ContextStateSerializerSetup.class, - ContextStateSerializerVerifier.class)); - } - return testSpecifications; - } - - // ---------------------------------------------------------------------------------------------- - // Specification for "transaction-state-serializer" - // ---------------------------------------------------------------------------------------------- - - /** - * This class is only public to work with {@link - * org.apache.flink.api.common.typeutils.ClassRelocator}. - */ - public static final class TransactionStateSerializerSetup - implements TypeSerializerUpgradeTestBase.PreUpgradeSetup< - FlinkKafkaProducer.KafkaTransactionState> { - @Override - public TypeSerializer createPriorSerializer() { - return new FlinkKafkaProducer.TransactionStateSerializer(); - } - - @Override - public FlinkKafkaProducer.KafkaTransactionState createTestData() { - @SuppressWarnings("unchecked") - FlinkKafkaInternalProducer mock = - Mockito.mock(FlinkKafkaInternalProducer.class); - return new FlinkKafkaProducer.KafkaTransactionState("1234", 3456, (short) 789, mock); - } - } - - /** - * This class is only public to work with {@link - * org.apache.flink.api.common.typeutils.ClassRelocator}. - */ - public static final class TransactionStateSerializerVerifier - implements TypeSerializerUpgradeTestBase.UpgradeVerifier< - FlinkKafkaProducer.KafkaTransactionState> { - @Override - public TypeSerializer createUpgradedSerializer() { - return new FlinkKafkaProducer.TransactionStateSerializer(); - } - - @Override - public Matcher testDataMatcher() { - @SuppressWarnings("unchecked") - FlinkKafkaInternalProducer mock = - Mockito.mock(FlinkKafkaInternalProducer.class); - return is( - new FlinkKafkaProducer.KafkaTransactionState("1234", 3456, (short) 789, mock)); - } - - @Override - public Matcher> - schemaCompatibilityMatcher(FlinkVersion version) { - return TypeSerializerMatchers.isCompatibleAsIs(); - } - } - - // ---------------------------------------------------------------------------------------------- - // Specification for "context-state-serializer" - // ---------------------------------------------------------------------------------------------- - - /** - * This class is only public to work with {@link - * org.apache.flink.api.common.typeutils.ClassRelocator}. - */ - public static final class ContextStateSerializerSetup - implements TypeSerializerUpgradeTestBase.PreUpgradeSetup< - FlinkKafkaProducer.KafkaTransactionContext> { - @Override - public TypeSerializer createPriorSerializer() { - return new FlinkKafkaProducer.ContextStateSerializer(); - } - - @Override - public FlinkKafkaProducer.KafkaTransactionContext createTestData() { - Set transactionIds = new HashSet<>(); - transactionIds.add("123"); - transactionIds.add("456"); - transactionIds.add("789"); - return new FlinkKafkaProducer.KafkaTransactionContext(transactionIds); - } - } - - /** - * This class is only public to work with {@link - * org.apache.flink.api.common.typeutils.ClassRelocator}. - */ - public static final class ContextStateSerializerVerifier - implements TypeSerializerUpgradeTestBase.UpgradeVerifier< - FlinkKafkaProducer.KafkaTransactionContext> { - @Override - public TypeSerializer - createUpgradedSerializer() { - return new FlinkKafkaProducer.ContextStateSerializer(); - } - - @Override - public Matcher testDataMatcher() { - Set transactionIds = new HashSet<>(); - transactionIds.add("123"); - transactionIds.add("456"); - transactionIds.add("789"); - return is(new FlinkKafkaProducer.KafkaTransactionContext(transactionIds)); - } - - @Override - public Matcher< - TypeSerializerSchemaCompatibility< - FlinkKafkaProducer.KafkaTransactionContext>> - schemaCompatibilityMatcher(FlinkVersion version) { - return TypeSerializerMatchers.isCompatibleAsIs(); - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java index 9fb16d40c..41dd0d1e5 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java @@ -17,19 +17,12 @@ package org.apache.flink.streaming.connectors.kafka; -import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.common.typeinfo.Types; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.MemorySize; import org.apache.flink.configuration.TaskManagerOptions; import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.DataStreamSource; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.sink.DiscardingSink; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; import org.apache.flink.test.util.MiniClusterWithClientResource; import org.apache.flink.util.InstantiationUtil; @@ -43,9 +36,6 @@ import java.io.Serializable; import java.util.Properties; -import java.util.UUID; - -import static org.apache.flink.test.util.TestUtils.tryExecute; /** * A class containing a special Kafka broker which has a log retention of only 250 ms. This way, we @@ -128,67 +118,6 @@ public static void shutDownServices() throws Exception { */ private static boolean stopProducer = false; - public void runAutoOffsetResetTest() throws Exception { - final String topic = "auto-offset-reset-test-" + UUID.randomUUID(); - - final int parallelism = 1; - final int elementsPerPartition = 50000; - - Properties tprops = new Properties(); - tprops.setProperty("retention.ms", "250"); - kafkaServer.createTestTopic(topic, parallelism, 1, tprops); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(parallelism); - env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately - - // ----------- add producer dataflow ---------- - - DataStream stream = - env.addSource( - new RichParallelSourceFunction() { - - private boolean running = true; - - @Override - public void run(SourceContext ctx) throws InterruptedException { - int cnt = - getRuntimeContext().getIndexOfThisSubtask() - * elementsPerPartition; - int limit = cnt + elementsPerPartition; - - while (running && !stopProducer && cnt < limit) { - ctx.collect("element-" + cnt); - cnt++; - Thread.sleep(10); - } - LOG.info("Stopping producer"); - } - - @Override - public void cancel() { - running = false; - } - }); - Properties props = new Properties(); - props.putAll(standardProps); - props.putAll(secureProps); - kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null); - - // ----------- add consumer dataflow ---------- - - NonContinousOffsetsDeserializationSchema deserSchema = - new NonContinousOffsetsDeserializationSchema(); - FlinkKafkaConsumerBase source = kafkaServer.getConsumer(topic, deserSchema, props); - - DataStreamSource consuming = env.addSource(source); - consuming.addSink(new DiscardingSink()); - - tryExecute(env, "run auto offset reset test"); - - kafkaServer.deleteTestTopic(topic); - } - private class NonContinousOffsetsDeserializationSchema implements KafkaDeserializationSchema { private int numJumps; diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java index 1494ff1f4..42932f5d8 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java @@ -17,21 +17,9 @@ package org.apache.flink.streaming.connectors.kafka; -import org.apache.flink.api.common.serialization.DeserializationSchema; -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.connector.kafka.source.KafkaSourceBuilder; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.DataStreamSink; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; - import org.apache.kafka.clients.consumer.ConsumerRecord; import java.util.Collection; -import java.util.Collections; -import java.util.List; import java.util.Map; import java.util.Properties; import java.util.UUID; @@ -122,78 +110,9 @@ public Properties getTransactionalProducerConfig() { return props; } - // -- consumer / producer instances: - public FlinkKafkaConsumerBase getConsumer( - List topics, DeserializationSchema deserializationSchema, Properties props) { - return getConsumer( - topics, new KafkaDeserializationSchemaWrapper(deserializationSchema), props); - } - - public FlinkKafkaConsumerBase getConsumer( - String topic, KafkaDeserializationSchema readSchema, Properties props) { - return getConsumer(Collections.singletonList(topic), readSchema, props); - } - - public FlinkKafkaConsumerBase getConsumer( - String topic, DeserializationSchema deserializationSchema, Properties props) { - return getConsumer(Collections.singletonList(topic), deserializationSchema, props); - } - - public abstract FlinkKafkaConsumerBase getConsumer( - List topics, KafkaDeserializationSchema readSchema, Properties props); - - public KafkaSourceBuilder getSourceBuilder( - List topics, DeserializationSchema deserializationSchema, Properties props) { - return getSourceBuilder( - topics, new KafkaDeserializationSchemaWrapper(deserializationSchema), props); - } - - public KafkaSourceBuilder getSourceBuilder( - String topic, KafkaDeserializationSchema readSchema, Properties props) { - return getSourceBuilder(Collections.singletonList(topic), readSchema, props); - } - - public KafkaSourceBuilder getSourceBuilder( - String topic, DeserializationSchema deserializationSchema, Properties props) { - return getSourceBuilder(Collections.singletonList(topic), deserializationSchema, props); - } - - public abstract KafkaSourceBuilder getSourceBuilder( - List topics, KafkaDeserializationSchema readSchema, Properties props); - public abstract Collection> getAllRecordsFromTopic( Properties properties, String topic); - public abstract StreamSink getProducerSink( - String topic, - SerializationSchema serSchema, - Properties props, - FlinkKafkaPartitioner partitioner); - - @Deprecated - public abstract DataStreamSink produceIntoKafka( - DataStream stream, - String topic, - KeyedSerializationSchema serSchema, - Properties props, - FlinkKafkaPartitioner partitioner); - - public abstract DataStreamSink produceIntoKafka( - DataStream stream, - String topic, - SerializationSchema serSchema, - Properties props, - FlinkKafkaPartitioner partitioner); - - public DataStreamSink produceIntoKafka( - DataStream stream, - String topic, - KafkaSerializationSchema serSchema, - Properties props) { - throw new RuntimeException( - "KafkaSerializationSchema is only supported on the modern Kafka Connector."); - } - // -- offset handlers /** Simple interface to commit and retrieve offsets. */ diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java index 31ddbbedf..52d3b9dc5 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java @@ -17,18 +17,9 @@ package org.apache.flink.streaming.connectors.kafka; -import org.apache.flink.api.common.serialization.SerializationSchema; -import org.apache.flink.connector.kafka.source.KafkaSource; -import org.apache.flink.connector.kafka.source.KafkaSourceBuilder; -import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema; import org.apache.flink.connector.kafka.testutils.DockerImageVersions; import org.apache.flink.connector.kafka.testutils.KafkaUtil; import org.apache.flink.core.testutils.CommonTestUtils; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.DataStreamSink; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; import org.apache.commons.collections.list.UnmodifiableList; import org.apache.kafka.clients.admin.AdminClient; @@ -55,9 +46,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Properties; import java.util.Random; import java.util.Set; @@ -81,16 +70,11 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment { private @Nullable Network network; private String brokerConnectionString = ""; private Properties standardProps; - private FlinkKafkaProducer.Semantic producerSemantic = FlinkKafkaProducer.Semantic.EXACTLY_ONCE; // 6 seconds is default. Seems to be too small for travis. 30 seconds private int zkTimeout = 30000; private Config config; private static final int REQUEST_TIMEOUT_SECONDS = 30; - public void setProducerSemantic(FlinkKafkaProducer.Semantic producerSemantic) { - this.producerSemantic = producerSemantic; - } - @Override public void prepare(Config config) throws Exception { // increase the timeout since in Travis ZK connection takes long time for secure connection. @@ -236,21 +220,6 @@ public String getVersion() { return DockerImageVersions.KAFKA; } - @Override - public FlinkKafkaConsumerBase getConsumer( - List topics, KafkaDeserializationSchema readSchema, Properties props) { - return new FlinkKafkaConsumer(topics, readSchema, props); - } - - @Override - public KafkaSourceBuilder getSourceBuilder( - List topics, KafkaDeserializationSchema schema, Properties props) { - return KafkaSource.builder() - .setTopics(topics) - .setDeserializer(KafkaRecordDeserializationSchema.of(schema)) - .setProperties(props); - } - @Override @SuppressWarnings("unchecked") public Collection> getAllRecordsFromTopic( @@ -258,65 +227,6 @@ public Collection> getAllRecordsFromTopic( return UnmodifiableList.decorate(KafkaUtil.drainAllRecordsFromTopic(topic, properties)); } - @Override - public StreamSink getProducerSink( - String topic, - SerializationSchema serSchema, - Properties props, - FlinkKafkaPartitioner partitioner) { - return new StreamSink<>( - new FlinkKafkaProducer<>( - topic, - serSchema, - props, - partitioner, - producerSemantic, - FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE)); - } - - @Override - public DataStreamSink produceIntoKafka( - DataStream stream, - String topic, - KeyedSerializationSchema serSchema, - Properties props, - FlinkKafkaPartitioner partitioner) { - return stream.addSink( - new FlinkKafkaProducer( - topic, - serSchema, - props, - Optional.ofNullable(partitioner), - producerSemantic, - FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE)); - } - - @Override - public DataStreamSink produceIntoKafka( - DataStream stream, - String topic, - SerializationSchema serSchema, - Properties props, - FlinkKafkaPartitioner partitioner) { - return stream.addSink( - new FlinkKafkaProducer( - topic, - serSchema, - props, - partitioner, - producerSemantic, - FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE)); - } - - @Override - public DataStreamSink produceIntoKafka( - DataStream stream, - String topic, - KafkaSerializationSchema serSchema, - Properties props) { - return stream.addSink(new FlinkKafkaProducer(topic, serSchema, props, producerSemantic)); - } - @Override public KafkaOffsetHandler createOffsetHandler() { return new KafkaOffsetHandlerImpl(); diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/NextTransactionalIdHintSerializerTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/NextTransactionalIdHintSerializerTest.java deleted file mode 100644 index 7632afef0..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/NextTransactionalIdHintSerializerTest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka; - -import org.apache.flink.api.common.typeutils.SerializerTestBase; -import org.apache.flink.api.common.typeutils.TypeSerializer; - -/** - * A test for the {@link TypeSerializer TypeSerializers} used for {@link - * FlinkKafkaProducer.NextTransactionalIdHint}. - */ -class NextTransactionalIdHintSerializerTest - extends SerializerTestBase { - - @Override - protected TypeSerializer createSerializer() { - return new FlinkKafkaProducer.NextTransactionalIdHintSerializer(); - } - - @Override - protected int getLength() { - return Long.BYTES + Integer.BYTES; - } - - @Override - protected Class getTypeClass() { - return (Class) FlinkKafkaProducer.NextTransactionalIdHint.class; - } - - @Override - protected FlinkKafkaProducer.NextTransactionalIdHint[] getTestData() { - return new FlinkKafkaProducer.NextTransactionalIdHint[] { - new FlinkKafkaProducer.NextTransactionalIdHint(1, 0L), - new FlinkKafkaProducer.NextTransactionalIdHint(1, 1L), - new FlinkKafkaProducer.NextTransactionalIdHint(1, -1L), - new FlinkKafkaProducer.NextTransactionalIdHint(2, 0L), - new FlinkKafkaProducer.NextTransactionalIdHint(2, 1L), - new FlinkKafkaProducer.NextTransactionalIdHint(2, -1L), - }; - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcherTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcherTest.java deleted file mode 100644 index 92fc08eb7..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcherTest.java +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.core.testutils.CheckedThread; -import org.apache.flink.core.testutils.OneShotLatch; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext; -import org.apache.flink.streaming.connectors.kafka.testutils.TestSourceContext; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; -import org.apache.flink.util.SerializedValue; - -import org.junit.Test; - -import javax.annotation.Nonnull; - -import java.util.ArrayDeque; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.Queue; - -import static org.assertj.core.api.Assertions.assertThat; - -/** Tests for the {@link AbstractFetcher}. */ -@SuppressWarnings("serial") -public class AbstractFetcherTest { - - @Test - public void testIgnorePartitionStateSentinelInSnapshot() throws Exception { - final String testTopic = "test topic name"; - Map originalPartitions = new HashMap<>(); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 1), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 2), - KafkaTopicPartitionStateSentinel.GROUP_OFFSET); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 3), - KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET); - - TestSourceContext sourceContext = new TestSourceContext<>(); - - TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - originalPartitions, - null, /* watermark strategy */ - new TestProcessingTimeService(), - 0); - - synchronized (sourceContext.getCheckpointLock()) { - HashMap currentState = fetcher.snapshotCurrentState(); - fetcher.commitInternalOffsetsToKafka( - currentState, - new KafkaCommitCallback() { - @Override - public void onSuccess() {} - - @Override - public void onException(Throwable cause) { - throw new RuntimeException("Callback failed", cause); - } - }); - - assertThat(fetcher.getLastCommittedOffsets()).isPresent(); - assertThat(fetcher.getLastCommittedOffsets().get()).isEmpty(); - } - } - - // ------------------------------------------------------------------------ - // Record emitting tests - // ------------------------------------------------------------------------ - - @Test - public void testSkipCorruptedRecord() throws Exception { - final String testTopic = "test topic name"; - Map originalPartitions = new HashMap<>(); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 1), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - - TestSourceContext sourceContext = new TestSourceContext<>(); - - TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - originalPartitions, - null, /* watermark strategy */ - new TestProcessingTimeService(), - 0); - - final KafkaTopicPartitionState partitionStateHolder = - fetcher.subscribedPartitionStates().get(0); - - emitRecord(fetcher, 1L, partitionStateHolder, 1L); - emitRecord(fetcher, 2L, partitionStateHolder, 2L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(2L); - assertThat(partitionStateHolder.getOffset()).isEqualTo(2L); - - // emit no records - fetcher.emitRecordsWithTimestamps(emptyQueue(), partitionStateHolder, 3L, Long.MIN_VALUE); - assertThat(sourceContext.getLatestElement().getValue().longValue()) - .isEqualTo(2L); // the null record should be skipped - assertThat(partitionStateHolder.getOffset()) - .isEqualTo(3L); // the offset in state still should have advanced - } - - @Test - public void testConcurrentPartitionsDiscoveryAndLoopFetching() throws Exception { - // test data - final KafkaTopicPartition testPartition = new KafkaTopicPartition("test", 42); - - // ----- create the test fetcher ----- - - SourceContext sourceContext = new TestSourceContext<>(); - Map partitionsWithInitialOffsets = - Collections.singletonMap( - testPartition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET); - - final OneShotLatch fetchLoopWaitLatch = new OneShotLatch(); - final OneShotLatch stateIterationBlockLatch = new OneShotLatch(); - - final TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - partitionsWithInitialOffsets, - null, /* watermark strategy */ - new TestProcessingTimeService(), - 10, - fetchLoopWaitLatch, - stateIterationBlockLatch); - - // ----- run the fetcher ----- - - final CheckedThread checkedThread = - new CheckedThread() { - @Override - public void go() throws Exception { - fetcher.runFetchLoop(); - } - }; - checkedThread.start(); - - // wait until state iteration begins before adding discovered partitions - fetchLoopWaitLatch.await(); - fetcher.addDiscoveredPartitions(Collections.singletonList(testPartition)); - - stateIterationBlockLatch.trigger(); - checkedThread.sync(); - } - - // ------------------------------------------------------------------------ - // Test mocks - // ------------------------------------------------------------------------ - - private static final class TestFetcher extends AbstractFetcher { - Map lastCommittedOffsets = null; - - private final OneShotLatch fetchLoopWaitLatch; - private final OneShotLatch stateIterationBlockLatch; - - TestFetcher( - SourceContext sourceContext, - Map assignedPartitionsWithStartOffsets, - SerializedValue> watermarkStrategy, - ProcessingTimeService processingTimeProvider, - long autoWatermarkInterval) - throws Exception { - - this( - sourceContext, - assignedPartitionsWithStartOffsets, - watermarkStrategy, - processingTimeProvider, - autoWatermarkInterval, - null, - null); - } - - TestFetcher( - SourceContext sourceContext, - Map assignedPartitionsWithStartOffsets, - SerializedValue> watermarkStrategy, - ProcessingTimeService processingTimeProvider, - long autoWatermarkInterval, - OneShotLatch fetchLoopWaitLatch, - OneShotLatch stateIterationBlockLatch) - throws Exception { - - super( - sourceContext, - assignedPartitionsWithStartOffsets, - watermarkStrategy, - processingTimeProvider, - autoWatermarkInterval, - TestFetcher.class.getClassLoader(), - new UnregisteredMetricsGroup(), - false); - - this.fetchLoopWaitLatch = fetchLoopWaitLatch; - this.stateIterationBlockLatch = stateIterationBlockLatch; - } - - /** - * Emulation of partition's iteration which is required for {@link - * AbstractFetcherTest#testConcurrentPartitionsDiscoveryAndLoopFetching}. - */ - @Override - public void runFetchLoop() throws Exception { - if (fetchLoopWaitLatch != null) { - for (KafkaTopicPartitionState ignored : subscribedPartitionStates()) { - fetchLoopWaitLatch.trigger(); - stateIterationBlockLatch.await(); - } - } else { - throw new UnsupportedOperationException(); - } - } - - @Override - public void cancel() { - throw new UnsupportedOperationException(); - } - - @Override - public Object createKafkaPartitionHandle(KafkaTopicPartition partition) { - return new Object(); - } - - @Override - protected void doCommitInternalOffsetsToKafka( - Map offsets, @Nonnull KafkaCommitCallback callback) { - lastCommittedOffsets = offsets; - callback.onSuccess(); - } - - public Optional> getLastCommittedOffsets() { - return Optional.ofNullable(lastCommittedOffsets); - } - } - - // ------------------------------------------------------------------------ - - private static void emitRecord( - AbstractFetcher fetcher, - T record, - KafkaTopicPartitionState partitionState, - long offset) { - ArrayDeque recordQueue = new ArrayDeque<>(); - recordQueue.add(record); - - fetcher.emitRecordsWithTimestamps(recordQueue, partitionState, offset, Long.MIN_VALUE); - } - - private static Queue emptyQueue() { - return new ArrayDeque<>(); - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcherWatermarksTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcherWatermarksTest.java deleted file mode 100644 index 6cd3b6453..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractFetcherWatermarksTest.java +++ /dev/null @@ -1,499 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.api.common.eventtime.WatermarkGenerator; -import org.apache.flink.api.common.eventtime.WatermarkOutput; -import org.apache.flink.api.common.eventtime.WatermarkStrategy; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; -import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; -import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.connectors.kafka.testutils.TestSourceContext; -import org.apache.flink.streaming.runtime.operators.util.AssignerWithPeriodicWatermarksAdapter; -import org.apache.flink.streaming.runtime.operators.util.AssignerWithPunctuatedWatermarksAdapter; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; -import org.apache.flink.util.SerializedValue; - -import org.junit.Test; -import org.junit.experimental.runners.Enclosed; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -import java.util.ArrayDeque; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Queue; - -import static org.assertj.core.api.Assertions.assertThat; - -/** Tests for the watermarking behaviour of {@link AbstractFetcher}. */ -@SuppressWarnings("serial") -@RunWith(Enclosed.class) -public class AbstractFetcherWatermarksTest { - - /** Tests with watermark generators that have a periodic nature. */ - @RunWith(Parameterized.class) - public static class PeriodicWatermarksSuite { - - @Parameterized.Parameters - public static Collection> getParams() { - return Arrays.asList( - new AssignerWithPeriodicWatermarksAdapter.Strategy<>( - new PeriodicTestExtractor()), - WatermarkStrategy.forGenerator((ctx) -> new PeriodicTestWatermarkGenerator()) - .withTimestampAssigner((event, previousTimestamp) -> event)); - } - - @Parameterized.Parameter public WatermarkStrategy testWmStrategy; - - @Test - public void testPeriodicWatermarks() throws Exception { - final String testTopic = "test topic name"; - Map originalPartitions = new HashMap<>(); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 7), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 13), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 21), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - - TestSourceContext sourceContext = new TestSourceContext<>(); - - TestProcessingTimeService processingTimeService = new TestProcessingTimeService(); - - TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - originalPartitions, - new SerializedValue<>(testWmStrategy), - processingTimeService, - 10); - - final KafkaTopicPartitionState part1 = - fetcher.subscribedPartitionStates().get(0); - final KafkaTopicPartitionState part2 = - fetcher.subscribedPartitionStates().get(1); - final KafkaTopicPartitionState part3 = - fetcher.subscribedPartitionStates().get(2); - - // elements generate a watermark if the timestamp is a multiple of three - - // elements for partition 1 - emitRecord(fetcher, 1L, part1, 1L); - emitRecord(fetcher, 1L, part1, 1L); - emitRecord(fetcher, 2L, part1, 2L); - emitRecord(fetcher, 3L, part1, 3L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(3L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(3L); - - // elements for partition 2 - emitRecord(fetcher, 12L, part2, 1L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(12L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(12L); - - // elements for partition 3 - emitRecord(fetcher, 101L, part3, 1L); - emitRecord(fetcher, 102L, part3, 2L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(102L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(102L); - - processingTimeService.setCurrentTime(10); - - // now, we should have a watermark (this blocks until the periodic thread emitted the - // watermark) - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(3L); - - // advance partition 3 - emitRecord(fetcher, 1003L, part3, 3L); - emitRecord(fetcher, 1004L, part3, 4L); - emitRecord(fetcher, 1005L, part3, 5L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(1005L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(1005L); - - // advance partition 1 beyond partition 2 - this bumps the watermark - emitRecord(fetcher, 30L, part1, 4L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(30L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(30L); - - processingTimeService.setCurrentTime(20); - - // this blocks until the periodic thread emitted the watermark - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(12L); - - // advance partition 2 again - this bumps the watermark - emitRecord(fetcher, 13L, part2, 2L); - emitRecord(fetcher, 14L, part2, 3L); - emitRecord(fetcher, 15L, part2, 3L); - - processingTimeService.setCurrentTime(30); - // this blocks until the periodic thread emitted the watermark - long watermarkTs = sourceContext.getLatestWatermark().getTimestamp(); - assertThat(watermarkTs >= 13L && watermarkTs <= 15L).isTrue(); - } - - @Test - public void testSkipCorruptedRecordWithPeriodicWatermarks() throws Exception { - final String testTopic = "test topic name"; - Map originalPartitions = new HashMap<>(); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 1), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - - TestSourceContext sourceContext = new TestSourceContext<>(); - - TestProcessingTimeService processingTimeProvider = new TestProcessingTimeService(); - - TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - originalPartitions, - new SerializedValue<>(testWmStrategy), - processingTimeProvider, - 10); - - final KafkaTopicPartitionState partitionStateHolder = - fetcher.subscribedPartitionStates().get(0); - - // elements generate a watermark if the timestamp is a multiple of three - emitRecord(fetcher, 1L, partitionStateHolder, 1L); - emitRecord(fetcher, 2L, partitionStateHolder, 2L); - emitRecord(fetcher, 3L, partitionStateHolder, 3L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(3L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(3L); - assertThat(partitionStateHolder.getOffset()).isEqualTo(3L); - - // advance timer for watermark emitting - processingTimeProvider.setCurrentTime(10L); - assertThat(sourceContext.hasWatermark()).isTrue(); - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(3L); - - // emit no records - fetcher.emitRecordsWithTimestamps( - emptyQueue(), partitionStateHolder, 4L, Long.MIN_VALUE); - - // no elements should have been collected - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(3L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(3L); - // the offset in state still should have advanced - assertThat(partitionStateHolder.getOffset()).isEqualTo(4L); - - // no watermarks should be collected - processingTimeProvider.setCurrentTime(20L); - assertThat(sourceContext.hasWatermark()).isFalse(); - } - - @Test - public void testPeriodicWatermarksWithNoSubscribedPartitionsShouldYieldNoWatermarks() - throws Exception { - final String testTopic = "test topic name"; - Map originalPartitions = new HashMap<>(); - - TestSourceContext sourceContext = new TestSourceContext<>(); - - TestProcessingTimeService processingTimeProvider = new TestProcessingTimeService(); - - TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - originalPartitions, - new SerializedValue<>(testWmStrategy), - processingTimeProvider, - 10); - - processingTimeProvider.setCurrentTime(10); - // no partitions; when the periodic watermark emitter fires, no watermark should be - // emitted - assertThat(sourceContext.hasWatermark()).isFalse(); - - // counter-test that when the fetcher does actually have partitions, - // when the periodic watermark emitter fires again, a watermark really is emitted - fetcher.addDiscoveredPartitions( - Collections.singletonList(new KafkaTopicPartition(testTopic, 0))); - emitRecord(fetcher, 100L, fetcher.subscribedPartitionStates().get(0), 3L); - processingTimeProvider.setCurrentTime(20); - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(100); - } - } - - /** Tests with watermark generators that have a punctuated nature. */ - public static class PunctuatedWatermarksSuite { - - @Test - public void testSkipCorruptedRecordWithPunctuatedWatermarks() throws Exception { - final String testTopic = "test topic name"; - Map originalPartitions = new HashMap<>(); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 1), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - - TestSourceContext sourceContext = new TestSourceContext<>(); - - TestProcessingTimeService processingTimeProvider = new TestProcessingTimeService(); - - AssignerWithPunctuatedWatermarksAdapter.Strategy testWmStrategy = - new AssignerWithPunctuatedWatermarksAdapter.Strategy<>( - new PunctuatedTestExtractor()); - - TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - originalPartitions, - new SerializedValue<>(testWmStrategy), - processingTimeProvider, - 0); - - final KafkaTopicPartitionState partitionStateHolder = - fetcher.subscribedPartitionStates().get(0); - - // elements generate a watermark if the timestamp is a multiple of three - emitRecord(fetcher, 1L, partitionStateHolder, 1L); - emitRecord(fetcher, 2L, partitionStateHolder, 2L); - emitRecord(fetcher, 3L, partitionStateHolder, 3L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(3L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(3L); - assertThat(sourceContext.hasWatermark()).isTrue(); - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(3L); - assertThat(partitionStateHolder.getOffset()).isEqualTo(3L); - - // emit no records - fetcher.emitRecordsWithTimestamps(emptyQueue(), partitionStateHolder, 4L, -1L); - - // no elements or watermarks should have been collected - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(3L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(3L); - assertThat(sourceContext.hasWatermark()).isFalse(); - // the offset in state still should have advanced - assertThat(partitionStateHolder.getOffset()).isEqualTo(4L); - } - - @Test - public void testPunctuatedWatermarks() throws Exception { - final String testTopic = "test topic name"; - Map originalPartitions = new HashMap<>(); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 7), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 13), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - originalPartitions.put( - new KafkaTopicPartition(testTopic, 21), - KafkaTopicPartitionStateSentinel.LATEST_OFFSET); - - TestSourceContext sourceContext = new TestSourceContext<>(); - - TestProcessingTimeService processingTimeProvider = new TestProcessingTimeService(); - - AssignerWithPunctuatedWatermarksAdapter.Strategy testWmStrategy = - new AssignerWithPunctuatedWatermarksAdapter.Strategy<>( - new PunctuatedTestExtractor()); - - TestFetcher fetcher = - new TestFetcher<>( - sourceContext, - originalPartitions, - new SerializedValue<>(testWmStrategy), - processingTimeProvider, - 0); - - final KafkaTopicPartitionState part1 = - fetcher.subscribedPartitionStates().get(0); - final KafkaTopicPartitionState part2 = - fetcher.subscribedPartitionStates().get(1); - final KafkaTopicPartitionState part3 = - fetcher.subscribedPartitionStates().get(2); - - // elements generate a watermark if the timestamp is a multiple of three - - // elements for partition 1 - emitRecords(fetcher, Arrays.asList(1L, 2L), part1, 1L); - emitRecord(fetcher, 2L, part1, 2L); - emitRecords(fetcher, Arrays.asList(2L, 3L), part1, 3L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(3L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(3L); - assertThat(sourceContext.hasWatermark()).isFalse(); - - // elements for partition 2 - emitRecord(fetcher, 12L, part2, 1L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(12L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(12L); - assertThat(sourceContext.hasWatermark()).isFalse(); - - // elements for partition 3 - emitRecord(fetcher, 101L, part3, 1L); - emitRecord(fetcher, 102L, part3, 2L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(102L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(102L); - - // now, we should have a watermark - assertThat(sourceContext.hasWatermark()).isTrue(); - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(3L); - - // advance partition 3 - emitRecord(fetcher, 1003L, part3, 3L); - emitRecord(fetcher, 1004L, part3, 4L); - emitRecord(fetcher, 1005L, part3, 5L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(1005L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(1005L); - - // advance partition 1 beyond partition 2 - this bumps the watermark - emitRecord(fetcher, 30L, part1, 4L); - assertThat(sourceContext.getLatestElement().getValue().longValue()).isEqualTo(30L); - assertThat(sourceContext.getLatestElement().getTimestamp()).isEqualTo(30L); - assertThat(sourceContext.hasWatermark()).isTrue(); - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(12L); - - // advance partition 2 again - this bumps the watermark - emitRecord(fetcher, 13L, part2, 2L); - assertThat(sourceContext.hasWatermark()).isFalse(); - emitRecord(fetcher, 14L, part2, 3L); - assertThat(sourceContext.hasWatermark()).isFalse(); - emitRecord(fetcher, 15L, part2, 3L); - assertThat(sourceContext.hasWatermark()).isTrue(); - assertThat(sourceContext.getLatestWatermark().getTimestamp()).isEqualTo(15L); - } - } - - private static final class TestFetcher extends AbstractFetcher { - TestFetcher( - SourceContext sourceContext, - Map assignedPartitionsWithStartOffsets, - SerializedValue> watermarkStrategy, - ProcessingTimeService processingTimeProvider, - long autoWatermarkInterval) - throws Exception { - super( - sourceContext, - assignedPartitionsWithStartOffsets, - watermarkStrategy, - processingTimeProvider, - autoWatermarkInterval, - TestFetcher.class.getClassLoader(), - new UnregisteredMetricsGroup(), - false); - } - - public void runFetchLoop() { - throw new UnsupportedOperationException(); - } - - @Override - public void cancel() { - throw new UnsupportedOperationException(); - } - - @Override - protected void doCommitInternalOffsetsToKafka( - Map offsets, - @Nonnull KafkaCommitCallback commitCallback) { - throw new UnsupportedOperationException(); - } - - @Override - protected Object createKafkaPartitionHandle(KafkaTopicPartition partition) { - return new Object(); - } - } - - private static void emitRecord( - AbstractFetcher fetcher, - T record, - KafkaTopicPartitionState partitionState, - long offset) { - ArrayDeque recordQueue = new ArrayDeque<>(); - recordQueue.add(record); - - fetcher.emitRecordsWithTimestamps(recordQueue, partitionState, offset, Long.MIN_VALUE); - } - - private static void emitRecords( - AbstractFetcher fetcher, - List records, - KafkaTopicPartitionState partitionState, - long offset) { - ArrayDeque recordQueue = new ArrayDeque<>(records); - - fetcher.emitRecordsWithTimestamps(recordQueue, partitionState, offset, Long.MIN_VALUE); - } - - private static Queue emptyQueue() { - return new ArrayDeque<>(); - } - - @SuppressWarnings("deprecation") - private static class PeriodicTestExtractor implements AssignerWithPeriodicWatermarks { - - private volatile long maxTimestamp = Long.MIN_VALUE; - - @Override - public long extractTimestamp(Long element, long previousElementTimestamp) { - maxTimestamp = Math.max(maxTimestamp, element); - return element; - } - - @Nullable - @Override - public Watermark getCurrentWatermark() { - return new Watermark(maxTimestamp); - } - } - - @SuppressWarnings("deprecation") - private static class PunctuatedTestExtractor implements AssignerWithPunctuatedWatermarks { - - @Override - public long extractTimestamp(Long element, long previousElementTimestamp) { - return element; - } - - @Nullable - @Override - public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) { - return extractedTimestamp % 3 == 0 ? new Watermark(extractedTimestamp) : null; - } - } - - private static class PeriodicTestWatermarkGenerator implements WatermarkGenerator { - - private volatile long maxTimestamp = Long.MIN_VALUE; - - @Override - public void onEvent(Long event, long eventTimestamp, WatermarkOutput output) { - maxTimestamp = Math.max(maxTimestamp, event); - } - - @Override - public void onPeriodicEmit(WatermarkOutput output) { - output.emitWatermark(new org.apache.flink.api.common.eventtime.Watermark(maxTimestamp)); - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractPartitionDiscovererTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractPartitionDiscovererTest.java deleted file mode 100644 index b47534596..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/AbstractPartitionDiscovererTest.java +++ /dev/null @@ -1,561 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.apache.flink.streaming.connectors.kafka.testutils.TestPartitionDiscoverer; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.regex.Pattern; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -/** - * Tests that the partition assignment in the partition discoverer is deterministic and stable, with - * both fixed and growing partitions. - */ -@RunWith(Parameterized.class) -public class AbstractPartitionDiscovererTest { - - private static final String TEST_TOPIC = "test-topic"; - private static final String TEST_TOPIC_PATTERN = "^" + TEST_TOPIC + "[0-9]*$"; - - private final KafkaTopicsDescriptor topicsDescriptor; - - public AbstractPartitionDiscovererTest(KafkaTopicsDescriptor topicsDescriptor) { - this.topicsDescriptor = topicsDescriptor; - } - - @Parameterized.Parameters(name = "KafkaTopicsDescriptor = {0}") - @SuppressWarnings("unchecked") - public static Collection timeCharacteristic() { - return Arrays.asList( - new KafkaTopicsDescriptor[] { - new KafkaTopicsDescriptor(Collections.singletonList(TEST_TOPIC), null) - }, - new KafkaTopicsDescriptor[] { - new KafkaTopicsDescriptor(null, Pattern.compile(TEST_TOPIC_PATTERN)) - }); - } - - @Test - public void testPartitionsEqualConsumersFixedPartitions() throws Exception { - List mockGetAllPartitionsForTopicsReturn = - Arrays.asList( - new KafkaTopicPartition(TEST_TOPIC, 0), - new KafkaTopicPartition(TEST_TOPIC, 1), - new KafkaTopicPartition(TEST_TOPIC, 2), - new KafkaTopicPartition(TEST_TOPIC, 3)); - - int numSubtasks = mockGetAllPartitionsForTopicsReturn.size(); - - // get the start index; the assertions below will fail if the assignment logic does not meet - // correct contracts - int numConsumers = - KafkaTopicPartitionAssigner.assign( - mockGetAllPartitionsForTopicsReturn.get(0), numSubtasks); - - for (int subtaskIndex = 0; - subtaskIndex < mockGetAllPartitionsForTopicsReturn.size(); - subtaskIndex++) { - TestPartitionDiscoverer partitionDiscoverer = - new TestPartitionDiscoverer( - topicsDescriptor, - subtaskIndex, - mockGetAllPartitionsForTopicsReturn.size(), - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - Collections.singletonList(TEST_TOPIC)), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - mockGetAllPartitionsForTopicsReturn)); - partitionDiscoverer.open(); - - List initialDiscovery = partitionDiscoverer.discoverPartitions(); - assertThat(initialDiscovery).hasSize(1); - assertThat( - contains( - mockGetAllPartitionsForTopicsReturn, - initialDiscovery.get(0).getPartition())) - .isTrue(); - assertThat(subtaskIndex) - .isEqualTo( - getExpectedSubtaskIndex( - initialDiscovery.get(0), numConsumers, numSubtasks)); - - // subsequent discoveries should not find anything - List secondDiscovery = partitionDiscoverer.discoverPartitions(); - List thirdDiscovery = partitionDiscoverer.discoverPartitions(); - assertThat(secondDiscovery).isEmpty(); - assertThat(thirdDiscovery).isEmpty(); - } - } - - @Test - public void testMultiplePartitionsPerConsumersFixedPartitions() { - try { - final int[] partitionIDs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - - final List mockGetAllPartitionsForTopicsReturn = new ArrayList<>(); - final Set allPartitions = new HashSet<>(); - - for (int p : partitionIDs) { - KafkaTopicPartition part = new KafkaTopicPartition(TEST_TOPIC, p); - mockGetAllPartitionsForTopicsReturn.add(part); - allPartitions.add(part); - } - - final int numConsumers = 3; - final int minPartitionsPerConsumer = - mockGetAllPartitionsForTopicsReturn.size() / numConsumers; - final int maxPartitionsPerConsumer = - mockGetAllPartitionsForTopicsReturn.size() / numConsumers + 1; - - // get the start index; the assertions below will fail if the assignment logic does not - // meet correct contracts - int startIndex = - KafkaTopicPartitionAssigner.assign( - mockGetAllPartitionsForTopicsReturn.get(0), numConsumers); - - for (int subtaskIndex = 0; subtaskIndex < numConsumers; subtaskIndex++) { - TestPartitionDiscoverer partitionDiscoverer = - new TestPartitionDiscoverer( - topicsDescriptor, - subtaskIndex, - numConsumers, - TestPartitionDiscoverer - .createMockGetAllTopicsSequenceFromFixedReturn( - Collections.singletonList(TEST_TOPIC)), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - mockGetAllPartitionsForTopicsReturn)); - partitionDiscoverer.open(); - - List initialDiscovery = - partitionDiscoverer.discoverPartitions(); - assertThat(initialDiscovery) - .hasSizeGreaterThanOrEqualTo(minPartitionsPerConsumer) - .hasSizeLessThanOrEqualTo(maxPartitionsPerConsumer); - - for (KafkaTopicPartition p : initialDiscovery) { - // check that the element was actually contained - assertThat(allPartitions.remove(p)).isTrue(); - assertThat(subtaskIndex) - .isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - // subsequent discoveries should not find anything - List secondDiscovery = - partitionDiscoverer.discoverPartitions(); - List thirdDiscovery = partitionDiscoverer.discoverPartitions(); - assertThat(secondDiscovery).isEmpty(); - assertThat(thirdDiscovery).isEmpty(); - } - - // all partitions must have been assigned - assertThat(allPartitions).isEmpty(); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testPartitionsFewerThanConsumersFixedPartitions() { - try { - List mockGetAllPartitionsForTopicsReturn = - Arrays.asList( - new KafkaTopicPartition(TEST_TOPIC, 0), - new KafkaTopicPartition(TEST_TOPIC, 1), - new KafkaTopicPartition(TEST_TOPIC, 2), - new KafkaTopicPartition(TEST_TOPIC, 3)); - - final Set allPartitions = new HashSet<>(); - allPartitions.addAll(mockGetAllPartitionsForTopicsReturn); - - final int numConsumers = 2 * mockGetAllPartitionsForTopicsReturn.size() + 3; - - // get the start index; the assertions below will fail if the assignment logic does not - // meet correct contracts - int startIndex = - KafkaTopicPartitionAssigner.assign( - mockGetAllPartitionsForTopicsReturn.get(0), numConsumers); - - for (int subtaskIndex = 0; subtaskIndex < numConsumers; subtaskIndex++) { - TestPartitionDiscoverer partitionDiscoverer = - new TestPartitionDiscoverer( - topicsDescriptor, - subtaskIndex, - numConsumers, - TestPartitionDiscoverer - .createMockGetAllTopicsSequenceFromFixedReturn( - Collections.singletonList(TEST_TOPIC)), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - mockGetAllPartitionsForTopicsReturn)); - partitionDiscoverer.open(); - - List initialDiscovery = - partitionDiscoverer.discoverPartitions(); - assertThat(initialDiscovery).hasSizeLessThanOrEqualTo(1); - - for (KafkaTopicPartition p : initialDiscovery) { - // check that the element was actually contained - assertThat(allPartitions.remove(p)).isTrue(); - assertThat(subtaskIndex) - .isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - // subsequent discoveries should not find anything - List secondDiscovery = - partitionDiscoverer.discoverPartitions(); - List thirdDiscovery = partitionDiscoverer.discoverPartitions(); - assertThat(secondDiscovery).isEmpty(); - assertThat(thirdDiscovery).isEmpty(); - } - - // all partitions must have been assigned - assertThat(allPartitions).isEmpty(); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testGrowingPartitions() { - try { - final int[] newPartitionIDs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - List allPartitions = new ArrayList<>(11); - - for (int p : newPartitionIDs) { - KafkaTopicPartition part = new KafkaTopicPartition(TEST_TOPIC, p); - allPartitions.add(part); - } - - // first discovery returns an initial subset of the partitions; second returns all - // partitions - List> mockGetAllPartitionsForTopicsReturnSequence = - Arrays.asList(new ArrayList<>(allPartitions.subList(0, 7)), allPartitions); - - final Set allNewPartitions = new HashSet<>(allPartitions); - final Set allInitialPartitions = - new HashSet<>(mockGetAllPartitionsForTopicsReturnSequence.get(0)); - - final int numConsumers = 3; - final int minInitialPartitionsPerConsumer = - mockGetAllPartitionsForTopicsReturnSequence.get(0).size() / numConsumers; - final int maxInitialPartitionsPerConsumer = - mockGetAllPartitionsForTopicsReturnSequence.get(0).size() / numConsumers + 1; - final int minNewPartitionsPerConsumer = allPartitions.size() / numConsumers; - final int maxNewPartitionsPerConsumer = allPartitions.size() / numConsumers + 1; - - // get the start index; the assertions below will fail if the assignment logic does not - // meet correct contracts - int startIndex = KafkaTopicPartitionAssigner.assign(allPartitions.get(0), numConsumers); - - TestPartitionDiscoverer partitionDiscovererSubtask0 = - new TestPartitionDiscoverer( - topicsDescriptor, - 0, - numConsumers, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - Collections.singletonList(TEST_TOPIC)), - deepClone(mockGetAllPartitionsForTopicsReturnSequence)); - partitionDiscovererSubtask0.open(); - - TestPartitionDiscoverer partitionDiscovererSubtask1 = - new TestPartitionDiscoverer( - topicsDescriptor, - 1, - numConsumers, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - Collections.singletonList(TEST_TOPIC)), - deepClone(mockGetAllPartitionsForTopicsReturnSequence)); - partitionDiscovererSubtask1.open(); - - TestPartitionDiscoverer partitionDiscovererSubtask2 = - new TestPartitionDiscoverer( - topicsDescriptor, - 2, - numConsumers, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - Collections.singletonList(TEST_TOPIC)), - deepClone(mockGetAllPartitionsForTopicsReturnSequence)); - partitionDiscovererSubtask2.open(); - - List initialDiscoverySubtask0 = - partitionDiscovererSubtask0.discoverPartitions(); - List initialDiscoverySubtask1 = - partitionDiscovererSubtask1.discoverPartitions(); - List initialDiscoverySubtask2 = - partitionDiscovererSubtask2.discoverPartitions(); - - assertThat(initialDiscoverySubtask0) - .hasSizeGreaterThanOrEqualTo(minInitialPartitionsPerConsumer) - .hasSizeLessThanOrEqualTo(maxInitialPartitionsPerConsumer); - assertThat(initialDiscoverySubtask1) - .hasSizeGreaterThanOrEqualTo(minInitialPartitionsPerConsumer) - .hasSizeLessThanOrEqualTo(maxInitialPartitionsPerConsumer); - assertThat(initialDiscoverySubtask2) - .hasSizeGreaterThanOrEqualTo(minInitialPartitionsPerConsumer) - .hasSizeLessThanOrEqualTo(maxInitialPartitionsPerConsumer); - - for (KafkaTopicPartition p : initialDiscoverySubtask0) { - // check that the element was actually contained - assertThat(allInitialPartitions.remove(p)).isTrue(); - assertThat(0).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - for (KafkaTopicPartition p : initialDiscoverySubtask1) { - // check that the element was actually contained - assertThat(allInitialPartitions.remove(p)).isTrue(); - assertThat(1).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - for (KafkaTopicPartition p : initialDiscoverySubtask2) { - // check that the element was actually contained - assertThat(allInitialPartitions.remove(p)).isTrue(); - assertThat(2).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - // all partitions must have been assigned - assertThat(allInitialPartitions).isEmpty(); - - // now, execute discover again (should find the extra new partitions) - List secondDiscoverySubtask0 = - partitionDiscovererSubtask0.discoverPartitions(); - List secondDiscoverySubtask1 = - partitionDiscovererSubtask1.discoverPartitions(); - List secondDiscoverySubtask2 = - partitionDiscovererSubtask2.discoverPartitions(); - - // new discovered partitions must not have been discovered before - assertThat(Collections.disjoint(secondDiscoverySubtask0, initialDiscoverySubtask0)) - .isTrue(); - assertThat(Collections.disjoint(secondDiscoverySubtask1, initialDiscoverySubtask1)) - .isTrue(); - assertThat(Collections.disjoint(secondDiscoverySubtask2, initialDiscoverySubtask2)) - .isTrue(); - - assertThat(secondDiscoverySubtask0.size() + initialDiscoverySubtask0.size()) - .isGreaterThanOrEqualTo(minNewPartitionsPerConsumer); - assertThat(secondDiscoverySubtask0.size() + initialDiscoverySubtask0.size()) - .isLessThanOrEqualTo(maxNewPartitionsPerConsumer); - assertThat(secondDiscoverySubtask1.size() + initialDiscoverySubtask1.size()) - .isGreaterThanOrEqualTo(minNewPartitionsPerConsumer); - assertThat(secondDiscoverySubtask1.size() + initialDiscoverySubtask1.size()) - .isLessThanOrEqualTo(maxNewPartitionsPerConsumer); - assertThat(secondDiscoverySubtask2.size() + initialDiscoverySubtask2.size()) - .isGreaterThanOrEqualTo(minNewPartitionsPerConsumer); - assertThat(secondDiscoverySubtask2.size() + initialDiscoverySubtask2.size()) - .isLessThanOrEqualTo(maxNewPartitionsPerConsumer); - - // check that the two discoveries combined form all partitions - - for (KafkaTopicPartition p : initialDiscoverySubtask0) { - assertThat(allNewPartitions.remove(p)).isTrue(); - assertThat(0).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - for (KafkaTopicPartition p : initialDiscoverySubtask1) { - assertThat(allNewPartitions.remove(p)).isTrue(); - assertThat(1).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - for (KafkaTopicPartition p : initialDiscoverySubtask2) { - assertThat(allNewPartitions.remove(p)).isTrue(); - assertThat(2).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - for (KafkaTopicPartition p : secondDiscoverySubtask0) { - assertThat(allNewPartitions.remove(p)).isTrue(); - assertThat(0).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - for (KafkaTopicPartition p : secondDiscoverySubtask1) { - assertThat(allNewPartitions.remove(p)).isTrue(); - assertThat(1).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - for (KafkaTopicPartition p : secondDiscoverySubtask2) { - assertThat(allNewPartitions.remove(p)).isTrue(); - assertThat(2).isEqualTo(getExpectedSubtaskIndex(p, startIndex, numConsumers)); - } - - // all partitions must have been assigned - assertThat(allNewPartitions).isEmpty(); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testDeterministicAssignmentWithDifferentFetchedPartitionOrdering() - throws Exception { - int numSubtasks = 4; - - List mockGetAllPartitionsForTopicsReturn = - Arrays.asList( - new KafkaTopicPartition("test-topic", 0), - new KafkaTopicPartition("test-topic", 1), - new KafkaTopicPartition("test-topic", 2), - new KafkaTopicPartition("test-topic", 3), - new KafkaTopicPartition("test-topic2", 0), - new KafkaTopicPartition("test-topic2", 1)); - - List mockGetAllPartitionsForTopicsReturnOutOfOrder = - Arrays.asList( - new KafkaTopicPartition("test-topic", 3), - new KafkaTopicPartition("test-topic", 1), - new KafkaTopicPartition("test-topic2", 1), - new KafkaTopicPartition("test-topic", 0), - new KafkaTopicPartition("test-topic2", 0), - new KafkaTopicPartition("test-topic", 2)); - - for (int subtaskIndex = 0; subtaskIndex < numSubtasks; subtaskIndex++) { - TestPartitionDiscoverer partitionDiscoverer = - new TestPartitionDiscoverer( - topicsDescriptor, - subtaskIndex, - numSubtasks, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - Arrays.asList("test-topic", "test-topic2")), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - mockGetAllPartitionsForTopicsReturn)); - partitionDiscoverer.open(); - - TestPartitionDiscoverer partitionDiscovererOutOfOrder = - new TestPartitionDiscoverer( - topicsDescriptor, - subtaskIndex, - numSubtasks, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - Arrays.asList("test-topic", "test-topic2")), - TestPartitionDiscoverer - .createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - mockGetAllPartitionsForTopicsReturnOutOfOrder)); - partitionDiscovererOutOfOrder.open(); - - List discoveredPartitions = - partitionDiscoverer.discoverPartitions(); - List discoveredPartitionsOutOfOrder = - partitionDiscovererOutOfOrder.discoverPartitions(); - - // the subscribed partitions should be identical, regardless of the input partition - // ordering - Collections.sort(discoveredPartitions, new KafkaTopicPartition.Comparator()); - Collections.sort(discoveredPartitionsOutOfOrder, new KafkaTopicPartition.Comparator()); - assertThat(discoveredPartitionsOutOfOrder).isEqualTo(discoveredPartitions); - } - } - - @Test - public void testNonContiguousPartitionIdDiscovery() throws Exception { - List mockGetAllPartitionsForTopicsReturn1 = - Arrays.asList( - new KafkaTopicPartition("test-topic", 1), - new KafkaTopicPartition("test-topic", 4)); - - List mockGetAllPartitionsForTopicsReturn2 = - Arrays.asList( - new KafkaTopicPartition("test-topic", 0), - new KafkaTopicPartition("test-topic", 1), - new KafkaTopicPartition("test-topic", 2), - new KafkaTopicPartition("test-topic", 3), - new KafkaTopicPartition("test-topic", 4)); - - TestPartitionDiscoverer partitionDiscoverer = - new TestPartitionDiscoverer( - topicsDescriptor, - 0, - 1, - TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn( - Collections.singletonList("test-topic")), - // first metadata fetch has missing partitions that appears only in the - // second fetch; - // need to create new modifiable lists for each fetch, since internally - // Iterable.remove() is used. - Arrays.asList( - new ArrayList<>(mockGetAllPartitionsForTopicsReturn1), - new ArrayList<>(mockGetAllPartitionsForTopicsReturn2))); - partitionDiscoverer.open(); - - List discoveredPartitions1 = partitionDiscoverer.discoverPartitions(); - assertThat(discoveredPartitions1) - .hasSize(2) - .contains( - new KafkaTopicPartition("test-topic", 1), - new KafkaTopicPartition("test-topic", 4)); - - List discoveredPartitions2 = partitionDiscoverer.discoverPartitions(); - assertThat(discoveredPartitions2) - .hasSize(3) - .contains( - new KafkaTopicPartition("test-topic", 0), - new KafkaTopicPartition("test-topic", 2), - new KafkaTopicPartition("test-topic", 3)); - } - - private boolean contains(List partitions, int partition) { - for (KafkaTopicPartition ktp : partitions) { - if (ktp.getPartition() == partition) { - return true; - } - } - - return false; - } - - private List> deepClone(List> toClone) { - List> clone = new ArrayList<>(toClone.size()); - for (List partitionsToClone : toClone) { - List clonePartitions = new ArrayList<>(partitionsToClone.size()); - clonePartitions.addAll(partitionsToClone); - - clone.add(clonePartitions); - } - - return clone; - } - - /** - * Utility method that determines the expected subtask index a partition should be assigned to, - * depending on the start index and using the partition id as the offset from that start index - * in clockwise direction. - * - *

    The expectation is based on the distribution contract of {@link - * KafkaTopicPartitionAssigner#assign(KafkaTopicPartition, int)}. - */ - private static int getExpectedSubtaskIndex( - KafkaTopicPartition partition, int startIndex, int numSubtasks) { - return (startIndex + partition.getPartition()) % numSubtasks; - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/ClosableBlockingQueueTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/ClosableBlockingQueueTest.java deleted file mode 100644 index 8697b1486..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/ClosableBlockingQueueTest.java +++ /dev/null @@ -1,616 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.concurrent.atomic.AtomicReference; - -import static java.util.Arrays.asList; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -/** Tests for the {@link ClosableBlockingQueue}. */ -public class ClosableBlockingQueueTest { - - // ------------------------------------------------------------------------ - // single-threaded unit tests - // ------------------------------------------------------------------------ - - @Test - public void testCreateQueueHashCodeEquals() { - try { - ClosableBlockingQueue queue1 = new ClosableBlockingQueue<>(); - ClosableBlockingQueue queue2 = new ClosableBlockingQueue<>(22); - - assertThat(queue1.isOpen()).isTrue(); - assertThat(queue2.isOpen()).isTrue(); - assertThat(queue1.isEmpty()).isTrue(); - assertThat(queue2.isEmpty()).isTrue(); - assertThat(queue1.size()).isEqualTo(0); - assertThat(queue2.size()).isEqualTo(0); - - assertThat(queue1.hashCode()).isEqualTo(queue2.hashCode()); - //noinspection EqualsWithItself - assertThat(queue1.equals(queue1)).isTrue(); - //noinspection EqualsWithItself - assertThat(queue2.equals(queue2)).isTrue(); - assertThat(queue1.equals(queue2)).isTrue(); - - assertThat(queue1.toString()).isNotNull(); - assertThat(queue2.toString()).isNotNull(); - - List elements = new ArrayList<>(); - elements.add("a"); - elements.add("b"); - elements.add("c"); - - ClosableBlockingQueue queue3 = new ClosableBlockingQueue<>(elements); - ClosableBlockingQueue queue4 = - new ClosableBlockingQueue<>(asList("a", "b", "c")); - - assertThat(queue3.isOpen()).isTrue(); - assertThat(queue4.isOpen()).isTrue(); - assertThat(queue3.isEmpty()).isFalse(); - assertThat(queue4.isEmpty()).isFalse(); - assertThat(queue3.size()).isEqualTo(3); - assertThat(queue4.size()).isEqualTo(3); - - assertThat(queue3.hashCode()).isEqualTo(queue4.hashCode()); - //noinspection EqualsWithItself - assertThat(queue3.equals(queue3)).isTrue(); - //noinspection EqualsWithItself - assertThat(queue4.equals(queue4)).isTrue(); - assertThat(queue3.equals(queue4)).isTrue(); - - assertThat(queue3.toString()).isNotNull(); - assertThat(queue4.toString()).isNotNull(); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testCloseEmptyQueue() { - try { - ClosableBlockingQueue queue = new ClosableBlockingQueue<>(); - assertThat(queue.isOpen()).isTrue(); - assertThat(queue.close()).isTrue(); - assertThat(queue.isOpen()).isFalse(); - - assertThat(queue.addIfOpen("element")).isFalse(); - assertThat(queue.isEmpty()).isTrue(); - - try { - queue.add("some element"); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testCloseNonEmptyQueue() { - try { - ClosableBlockingQueue queue = new ClosableBlockingQueue<>(asList(1, 2, 3)); - assertThat(queue.isOpen()).isTrue(); - - assertThat(queue.close()).isFalse(); - assertThat(queue.close()).isFalse(); - - queue.poll(); - - assertThat(queue.close()).isFalse(); - assertThat(queue.close()).isFalse(); - - queue.pollBatch(); - - assertThat(queue.close()).isTrue(); - assertThat(queue.isOpen()).isFalse(); - - assertThat(queue.addIfOpen(42)).isFalse(); - assertThat(queue.isEmpty()).isTrue(); - - try { - queue.add(99); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testPeekAndPoll() { - try { - ClosableBlockingQueue queue = new ClosableBlockingQueue<>(); - - assertThat(queue.peek()).isNull(); - assertThat(queue.peek()).isNull(); - assertThat(queue.poll()).isNull(); - assertThat(queue.poll()).isNull(); - - assertThat(queue.size()).isEqualTo(0); - - queue.add("a"); - queue.add("b"); - queue.add("c"); - - assertThat(queue.size()).isEqualTo(3); - - assertThat(queue.peek()).isEqualTo("a"); - assertThat(queue.peek()).isEqualTo("a"); - assertThat(queue.peek()).isEqualTo("a"); - - assertThat(queue.size()).isEqualTo(3); - - assertThat(queue.poll()).isEqualTo("a"); - assertThat(queue.poll()).isEqualTo("b"); - - assertThat(queue.size()).isEqualTo(1); - - assertThat(queue.peek()).isEqualTo("c"); - assertThat(queue.peek()).isEqualTo("c"); - - assertThat(queue.poll()).isEqualTo("c"); - - assertThat(queue.size()).isEqualTo(0); - assertThat(queue.poll()).isNull(); - assertThat(queue.peek()).isNull(); - assertThat(queue.peek()).isNull(); - - assertThat(queue.close()).isTrue(); - - try { - queue.peek(); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - - try { - queue.poll(); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testPollBatch() { - try { - ClosableBlockingQueue queue = new ClosableBlockingQueue<>(); - - assertThat(queue.pollBatch()).isNull(); - - queue.add("a"); - queue.add("b"); - - assertThat(queue.pollBatch()).isEqualTo(asList("a", "b")); - assertThat(queue.pollBatch()).isNull(); - - queue.add("c"); - - assertThat(queue.pollBatch()).containsExactly("c"); - assertThat(queue.pollBatch()).isNull(); - - assertThat(queue.close()).isTrue(); - - try { - queue.pollBatch(); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testGetElementBlocking() { - try { - ClosableBlockingQueue queue = new ClosableBlockingQueue<>(); - - assertThat(queue.getElementBlocking(1)).isNull(); - assertThat(queue.getElementBlocking(3)).isNull(); - assertThat(queue.getElementBlocking(2)).isNull(); - - assertThat(queue.size()).isEqualTo(0); - - queue.add("a"); - queue.add("b"); - queue.add("c"); - queue.add("d"); - queue.add("e"); - queue.add("f"); - - assertThat(queue.size()).isEqualTo(6); - - assertThat(queue.getElementBlocking(99)).isEqualTo("a"); - assertThat(queue.getElementBlocking()).isEqualTo("b"); - - assertThat(queue.size()).isEqualTo(4); - - assertThat(queue.getElementBlocking(0)).isEqualTo("c"); - assertThat(queue.getElementBlocking(1000000)).isEqualTo("d"); - assertThat(queue.getElementBlocking()).isEqualTo("e"); - assertThat(queue.getElementBlocking(1786598)).isEqualTo("f"); - - assertThat(queue.size()).isEqualTo(0); - - assertThat(queue.getElementBlocking(1)).isNull(); - assertThat(queue.getElementBlocking(3)).isNull(); - assertThat(queue.getElementBlocking(2)).isNull(); - - assertThat(queue.close()).isTrue(); - - try { - queue.getElementBlocking(); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - - try { - queue.getElementBlocking(1000000000L); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @Test - public void testGetBatchBlocking() { - try { - ClosableBlockingQueue queue = new ClosableBlockingQueue<>(); - - assertThat(queue.getBatchBlocking(1)).isEmpty(); - assertThat(queue.getBatchBlocking(3)).isEmpty(); - assertThat(queue.getBatchBlocking(2)).isEmpty(); - - queue.add("a"); - queue.add("b"); - - assertThat(queue.getBatchBlocking(900000009)).isEqualTo(asList("a", "b")); - - queue.add("c"); - queue.add("d"); - - assertThat(queue.getBatchBlocking()).isEqualTo(asList("c", "d")); - - assertThat(queue.getBatchBlocking(2)).isEmpty(); - - queue.add("e"); - - assertThat(queue.getBatchBlocking(0)).containsExactly("e"); - - queue.add("f"); - - assertThat(queue.getBatchBlocking(1000000000)).containsExactly("f"); - - assertThat(queue.size()).isEqualTo(0); - - assertThat(queue.getBatchBlocking(1)).isEmpty(); - assertThat(queue.getBatchBlocking(3)).isEmpty(); - assertThat(queue.getBatchBlocking(2)).isEmpty(); - - assertThat(queue.close()).isTrue(); - - try { - queue.getBatchBlocking(); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - - try { - queue.getBatchBlocking(1000000000L); - fail("should cause an exception"); - } catch (IllegalStateException ignored) { - // expected - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - // ------------------------------------------------------------------------ - // multi-threaded tests - // ------------------------------------------------------------------------ - - @Test - public void notifyOnClose() { - try { - final long oneYear = 365L * 24 * 60 * 60 * 1000; - - // test "getBatchBlocking()" - final ClosableBlockingQueue queue1 = new ClosableBlockingQueue<>(); - QueueCall call1 = - new QueueCall() { - @Override - public void call() throws Exception { - queue1.getBatchBlocking(); - } - }; - testCallExitsOnClose(call1, queue1); - - // test "getBatchBlocking()" - final ClosableBlockingQueue queue2 = new ClosableBlockingQueue<>(); - QueueCall call2 = - new QueueCall() { - @Override - public void call() throws Exception { - queue2.getBatchBlocking(oneYear); - } - }; - testCallExitsOnClose(call2, queue2); - - // test "getBatchBlocking()" - final ClosableBlockingQueue queue3 = new ClosableBlockingQueue<>(); - QueueCall call3 = - new QueueCall() { - @Override - public void call() throws Exception { - queue3.getElementBlocking(); - } - }; - testCallExitsOnClose(call3, queue3); - - // test "getBatchBlocking()" - final ClosableBlockingQueue queue4 = new ClosableBlockingQueue<>(); - QueueCall call4 = - new QueueCall() { - @Override - public void call() throws Exception { - queue4.getElementBlocking(oneYear); - } - }; - testCallExitsOnClose(call4, queue4); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - @SuppressWarnings("ThrowableResultOfMethodCallIgnored") - @Test - public void testMultiThreadedAddGet() { - try { - final ClosableBlockingQueue queue = new ClosableBlockingQueue<>(); - final AtomicReference pushErrorRef = new AtomicReference<>(); - final AtomicReference pollErrorRef = new AtomicReference<>(); - - final int numElements = 2000; - - Thread pusher = - new Thread("pusher") { - - @Override - public void run() { - try { - final Random rnd = new Random(); - for (int i = 0; i < numElements; i++) { - queue.add(i); - - // sleep a bit, sometimes - int sleepTime = rnd.nextInt(3); - if (sleepTime > 1) { - Thread.sleep(sleepTime); - } - } - - while (true) { - if (queue.close()) { - break; - } else { - Thread.sleep(5); - } - } - } catch (Throwable t) { - pushErrorRef.set(t); - } - } - }; - pusher.start(); - - Thread poller = - new Thread("poller") { - - @SuppressWarnings("InfiniteLoopStatement") - @Override - public void run() { - try { - int count = 0; - - try { - final Random rnd = new Random(); - int nextExpected = 0; - - while (true) { - int getMethod = count % 7; - switch (getMethod) { - case 0: - { - Integer next = queue.getElementBlocking(1); - if (next != null) { - assertThat(next.intValue()) - .isEqualTo(nextExpected); - nextExpected++; - count++; - } - break; - } - case 1: - { - List nextList = - queue.getBatchBlocking(); - for (Integer next : nextList) { - assertThat(next).isNotNull(); - assertThat(next.intValue()) - .isEqualTo(nextExpected); - nextExpected++; - count++; - } - break; - } - case 2: - { - List nextList = - queue.getBatchBlocking(1); - if (nextList != null) { - for (Integer next : nextList) { - assertThat(next).isNotNull(); - assertThat(next.intValue()) - .isEqualTo(nextExpected); - nextExpected++; - count++; - } - } - break; - } - case 3: - { - Integer next = queue.poll(); - if (next != null) { - assertThat(next.intValue()) - .isEqualTo(nextExpected); - nextExpected++; - count++; - } - break; - } - case 4: - { - List nextList = queue.pollBatch(); - if (nextList != null) { - for (Integer next : nextList) { - assertThat(next).isNotNull(); - assertThat(next.intValue()) - .isEqualTo(nextExpected); - nextExpected++; - count++; - } - } - break; - } - default: - { - Integer next = queue.getElementBlocking(); - assertThat(next).isNotNull(); - assertThat(next.intValue()) - .isEqualTo(nextExpected); - nextExpected++; - count++; - } - } - - // sleep a bit, sometimes - int sleepTime = rnd.nextInt(3); - if (sleepTime > 1) { - Thread.sleep(sleepTime); - } - } - } catch (IllegalStateException e) { - // we get this once the queue is closed - assertThat(count).isEqualTo(numElements); - } - } catch (Throwable t) { - pollErrorRef.set(t); - } - } - }; - poller.start(); - - pusher.join(); - poller.join(); - - if (pushErrorRef.get() != null) { - Throwable t = pushErrorRef.get(); - t.printStackTrace(); - fail("Error in pusher: " + t.getMessage()); - } - if (pollErrorRef.get() != null) { - Throwable t = pollErrorRef.get(); - t.printStackTrace(); - fail("Error in poller: " + t.getMessage()); - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } - - // ------------------------------------------------------------------------ - // Utils - // ------------------------------------------------------------------------ - - private static void testCallExitsOnClose( - final QueueCall call, ClosableBlockingQueue queue) throws Exception { - - final AtomicReference errorRef = new AtomicReference<>(); - - Runnable runnable = - new Runnable() { - @Override - public void run() { - try { - call.call(); - } catch (Throwable t) { - errorRef.set(t); - } - } - }; - - Thread thread = new Thread(runnable); - thread.start(); - Thread.sleep(100); - queue.close(); - thread.join(); - - @SuppressWarnings("ThrowableResultOfMethodCallIgnored") - Throwable cause = errorRef.get(); - assertThat(cause).isInstanceOf(IllegalStateException.class); - } - - private interface QueueCall { - void call() throws Exception; - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionTest.java index 30e651691..c28e4a688 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionTest.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicPartitionTest.java @@ -18,6 +18,7 @@ package org.apache.flink.streaming.connectors.kafka.internals; +import org.apache.kafka.common.TopicPartition; import org.junit.Test; import java.lang.reflect.Field; @@ -26,14 +27,14 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; -/** Tests for the {@link KafkaTopicPartition}. */ +/** Tests for the {@link TopicPartition}. */ public class KafkaTopicPartitionTest { @Test public void validateUid() { Field uidField; try { - uidField = KafkaTopicPartition.class.getDeclaredField("serialVersionUID"); + uidField = TopicPartition.class.getDeclaredField("serialVersionUID"); uidField.setAccessible(true); } catch (NoSuchFieldException e) { fail("serialVersionUID is not defined"); @@ -48,7 +49,7 @@ public void validateUid() { // the UID has to be constant to make sure old checkpoints/savepoints can be read try { - assertThat(uidField.getLong(null)).isEqualTo(722083576322742325L); + assertThat(uidField.getLong(null)).isEqualTo(-613627415771699627L); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicsDescriptorTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicsDescriptorTest.java deleted file mode 100644 index bb029d85e..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/internals/KafkaTopicsDescriptorTest.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.internals; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.regex.Pattern; - -import static org.assertj.core.api.Assertions.assertThat; - -/** Tests for the {@link KafkaTopicsDescriptor}. */ -@RunWith(Parameterized.class) -public class KafkaTopicsDescriptorTest { - - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList( - new Object[][] { - {"topic1", null, Arrays.asList("topic1", "topic2", "topic3"), true}, - {"topic1", null, Arrays.asList("topic2", "topic3"), false}, - {"topic1", Pattern.compile("topic[0-9]"), null, true}, - {"topicx", Pattern.compile("topic[0-9]"), null, false} - }); - } - - private String topic; - private Pattern topicPattern; - private List fixedTopics; - boolean expected; - - public KafkaTopicsDescriptorTest( - String topic, Pattern topicPattern, List fixedTopics, boolean expected) { - this.topic = topic; - this.topicPattern = topicPattern; - this.fixedTopics = fixedTopics; - this.expected = expected; - } - - @Test - public void testIsMatchingTopic() { - KafkaTopicsDescriptor topicsDescriptor = - new KafkaTopicsDescriptor(fixedTopics, topicPattern); - - assertThat(topicsDescriptor.isMatchingTopic(topic)).isEqualTo(expected); - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleExactlyOnceITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleExactlyOnceITCase.java deleted file mode 100644 index 7d37f6c34..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleExactlyOnceITCase.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.shuffle; - -import org.apache.flink.api.common.functions.MapFunction; -import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.java.tuple.Tuple; -import org.apache.flink.api.java.tuple.Tuple3; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.datastream.KeyedStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.connectors.kafka.testutils.FailingIdentityMapper; -import org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.Timeout; - -import java.util.UUID; - -import static org.apache.flink.streaming.api.TimeCharacteristic.EventTime; -import static org.apache.flink.streaming.api.TimeCharacteristic.IngestionTime; -import static org.apache.flink.streaming.api.TimeCharacteristic.ProcessingTime; -import static org.apache.flink.test.util.TestUtils.tryExecute; - -/** Failure Recovery IT Test for KafkaShuffle. */ -public class KafkaShuffleExactlyOnceITCase extends KafkaShuffleTestBase { - - @Rule public final Timeout timeout = Timeout.millis(600000L); - - /** - * Failure Recovery after processing 2/3 data with time characteristic: ProcessingTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testFailureRecoveryProcessingTime() throws Exception { - testKafkaShuffleFailureRecovery(1000, ProcessingTime); - } - - /** - * Failure Recovery after processing 2/3 data with time characteristic: IngestionTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testFailureRecoveryIngestionTime() throws Exception { - testKafkaShuffleFailureRecovery(1000, IngestionTime); - } - - /** - * Failure Recovery after processing 2/3 data with time characteristic: EventTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testFailureRecoveryEventTime() throws Exception { - testKafkaShuffleFailureRecovery(1000, EventTime); - } - - /** - * Failure Recovery after data is repartitioned with time characteristic: ProcessingTime. - * - *

    Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3. - */ - @Test - public void testAssignedToPartitionFailureRecoveryProcessingTime() throws Exception { - testAssignedToPartitionFailureRecovery(500, ProcessingTime); - } - - /** - * Failure Recovery after data is repartitioned with time characteristic: IngestionTime. - * - *

    Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3. - */ - @Test - public void testAssignedToPartitionFailureRecoveryIngestionTime() throws Exception { - testAssignedToPartitionFailureRecovery(500, IngestionTime); - } - - /** - * Failure Recovery after data is repartitioned with time characteristic: EventTime. - * - *

    Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3. - */ - @Test - public void testAssignedToPartitionFailureRecoveryEventTime() throws Exception { - testAssignedToPartitionFailureRecovery(500, EventTime); - } - - /** - * To test failure recovery after processing 2/3 data. - * - *

    Schema: (key, timestamp, source instance Id). Producer Parallelism = 1; Kafka Partition # - * = 1; Consumer Parallelism = 1 - */ - private void testKafkaShuffleFailureRecovery( - int numElementsPerProducer, TimeCharacteristic timeCharacteristic) throws Exception { - - String topic = topic("failure_recovery-" + UUID.randomUUID(), timeCharacteristic); - final int numberOfPartitions = 1; - final int producerParallelism = 1; - final int failAfterElements = numElementsPerProducer * numberOfPartitions * 2 / 3; - - createTestTopic(topic, numberOfPartitions, 1); - - final StreamExecutionEnvironment env = - createEnvironment(producerParallelism, timeCharacteristic).enableCheckpointing(500); - - createKafkaShuffle( - env, - topic, - numElementsPerProducer, - producerParallelism, - timeCharacteristic, - numberOfPartitions) - .map(new FailingIdentityMapper<>(failAfterElements)) - .setParallelism(1) - .map(new ToInteger(producerParallelism)) - .setParallelism(1) - .addSink( - new ValidatingExactlyOnceSink(numElementsPerProducer * producerParallelism)) - .setParallelism(1); - - FailingIdentityMapper.failedBefore = false; - - tryExecute(env, topic); - - deleteTestTopic(topic); - } - - /** - * To test failure recovery with partition assignment after processing 2/3 data. - * - *

    Schema: (key, timestamp, source instance Id). Producer Parallelism = 2; Kafka Partition # - * = 3; Consumer Parallelism = 3 - */ - private void testAssignedToPartitionFailureRecovery( - int numElementsPerProducer, TimeCharacteristic timeCharacteristic) throws Exception { - String topic = topic("partition_failure_recovery-" + UUID.randomUUID(), timeCharacteristic); - final int numberOfPartitions = 3; - final int producerParallelism = 2; - final int failAfterElements = numElementsPerProducer * producerParallelism * 2 / 3; - - createTestTopic(topic, numberOfPartitions, 1); - - final StreamExecutionEnvironment env = - createEnvironment(producerParallelism, timeCharacteristic); - - KeyedStream, Tuple> keyedStream = - createKafkaShuffle( - env, - topic, - numElementsPerProducer, - producerParallelism, - timeCharacteristic, - numberOfPartitions); - keyedStream - .process( - new PartitionValidator( - keyedStream.getKeySelector(), numberOfPartitions, topic)) - .setParallelism(numberOfPartitions) - .map(new ToInteger(producerParallelism)) - .setParallelism(numberOfPartitions) - .map(new FailingIdentityMapper<>(failAfterElements)) - .setParallelism(1) - .addSink( - new ValidatingExactlyOnceSink(numElementsPerProducer * producerParallelism)) - .setParallelism(1); - - FailingIdentityMapper.failedBefore = false; - - tryExecute(env, topic); - - deleteTestTopic(topic); - } - - private StreamExecutionEnvironment createEnvironment( - int producerParallelism, TimeCharacteristic timeCharacteristic) { - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(producerParallelism); - env.setStreamTimeCharacteristic(timeCharacteristic); - env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); - env.setBufferTimeout(0); - env.enableCheckpointing(500); - - return env; - } - - private static class ToInteger implements MapFunction, Integer> { - private final int producerParallelism; - - ToInteger(int producerParallelism) { - this.producerParallelism = producerParallelism; - } - - @Override - public Integer map(Tuple3 element) throws Exception { - - return element.f0 * producerParallelism + element.f2; - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleITCase.java deleted file mode 100644 index 5505bdde3..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleITCase.java +++ /dev/null @@ -1,543 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.shuffle; - -import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.api.java.tuple.Tuple; -import org.apache.flink.api.java.tuple.Tuple3; -import org.apache.flink.api.java.typeutils.TupleTypeInfo; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.KeyedStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElement; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleElementDeserializer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleRecord; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaShuffleFetcher.KafkaShuffleWatermark; -import org.apache.flink.util.PropertiesUtil; - -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.Timeout; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; - -import static org.apache.flink.streaming.api.TimeCharacteristic.EventTime; -import static org.apache.flink.streaming.api.TimeCharacteristic.IngestionTime; -import static org.apache.flink.streaming.api.TimeCharacteristic.ProcessingTime; -import static org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PARTITION_NUMBER; -import static org.apache.flink.streaming.connectors.kafka.shuffle.FlinkKafkaShuffle.PRODUCER_PARALLELISM; -import static org.apache.flink.test.util.TestUtils.tryExecute; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -/** Simple End to End Test for Kafka. */ -public class KafkaShuffleITCase extends KafkaShuffleTestBase { - - @Rule public final Timeout timeout = Timeout.millis(600000L); - - /** - * To test no data is lost or duplicated end-2-end with the default time characteristic: - * ProcessingTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testSimpleProcessingTime() throws Exception { - testKafkaShuffle(200000, ProcessingTime); - } - - /** - * To test no data is lost or duplicated end-2-end with time characteristic: IngestionTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testSimpleIngestionTime() throws Exception { - testKafkaShuffle(200000, IngestionTime); - } - - /** - * To test no data is lost or duplicated end-2-end with time characteristic: EventTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testSimpleEventTime() throws Exception { - testKafkaShuffle(100000, EventTime); - } - - /** - * To test data is partitioned to the right partition with time characteristic: ProcessingTime. - * - *

    Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3. - */ - @Test - public void testAssignedToPartitionProcessingTime() throws Exception { - testAssignedToPartition(300000, ProcessingTime); - } - - /** - * To test data is partitioned to the right partition with time characteristic: IngestionTime. - * - *

    Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3. - */ - @Test - public void testAssignedToPartitionIngestionTime() throws Exception { - testAssignedToPartition(300000, IngestionTime); - } - - /** - * To test data is partitioned to the right partition with time characteristic: EventTime. - * - *

    Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3. - */ - @Test - public void testAssignedToPartitionEventTime() throws Exception { - testAssignedToPartition(100000, EventTime); - } - - /** - * To test watermark is monotonically incremental with randomized watermark. - * - *

    Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3. - */ - @Test - public void testWatermarkIncremental() throws Exception { - testWatermarkIncremental(100000); - } - - /** - * To test value serialization and deserialization with time characteristic: ProcessingTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testSerDeProcessingTime() throws Exception { - testRecordSerDe(ProcessingTime); - } - - /** - * To test value and watermark serialization and deserialization with time characteristic: - * IngestionTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testSerDeIngestionTime() throws Exception { - testRecordSerDe(IngestionTime); - } - - /** - * To test value and watermark serialization and deserialization with time characteristic: - * EventTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testSerDeEventTime() throws Exception { - testRecordSerDe(EventTime); - } - - /** - * To test value and watermark serialization and deserialization with time characteristic: - * EventTime. - * - *

    Producer Parallelism = 1; Kafka Partition # = 1; Consumer Parallelism = 1. - */ - @Test - public void testWatermarkBroadcasting() throws Exception { - final int numberOfPartitions = 3; - final int producerParallelism = 2; - final int numElementsPerProducer = 1000; - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - Map>> results = - testKafkaShuffleProducer( - topic("test_watermark_broadcast-" + UUID.randomUUID(), EventTime), - env, - numberOfPartitions, - producerParallelism, - numElementsPerProducer, - EventTime); - TypeSerializer> typeSerializer = createTypeSerializer(env); - KafkaShuffleElementDeserializer deserializer = - new KafkaShuffleElementDeserializer<>(typeSerializer); - - // Records in a single partition are kept in order - for (int p = 0; p < numberOfPartitions; p++) { - Collection> records = results.get(p); - Map> watermarks = new HashMap<>(); - - for (ConsumerRecord consumerRecord : records) { - assertThat(consumerRecord.key()).isNull(); - KafkaShuffleElement element = deserializer.deserialize(consumerRecord); - if (element.isRecord()) { - KafkaShuffleRecord> record = element.asRecord(); - assertThat(INIT_TIMESTAMP + record.getValue().f0) - .isEqualTo(record.getValue().f1.longValue()); - assertThat(record.getValue().f1.longValue()) - .isEqualTo(record.getTimestamp().longValue()); - } else if (element.isWatermark()) { - KafkaShuffleWatermark watermark = element.asWatermark(); - watermarks.computeIfAbsent(watermark.getSubtask(), k -> new ArrayList<>()); - watermarks.get(watermark.getSubtask()).add(watermark); - } else { - fail("KafkaShuffleElement is either record or watermark"); - } - } - - // According to the setting how watermarks are generated in this ITTest, - // every producer task emits a watermark corresponding to each record + the - // end-of-event-time watermark. - // Hence each producer sub task generates `numElementsPerProducer + 1` watermarks. - // Each producer sub task broadcasts these `numElementsPerProducer + 1` watermarks to - // all partitions. - // Thus in total, each producer sub task emits `(numElementsPerProducer + 1) * - // numberOfPartitions` watermarks. - // From the consumer side, each partition receives `(numElementsPerProducer + 1) * - // producerParallelism` watermarks, - // with each producer sub task produces `numElementsPerProducer + 1` watermarks. - // Besides, watermarks from the same producer sub task should keep in order. - for (List subTaskWatermarks : watermarks.values()) { - int index = 0; - assertThat(subTaskWatermarks).hasSize(numElementsPerProducer + 1); - for (KafkaShuffleWatermark watermark : subTaskWatermarks) { - if (index == numElementsPerProducer) { - // the last element is the watermark that signifies end-of-event-time - assertThat(Watermark.MAX_WATERMARK.getTimestamp()) - .isEqualTo(watermark.getWatermark()); - } else { - assertThat(INIT_TIMESTAMP + index++).isEqualTo(watermark.getWatermark()); - } - } - } - } - } - - /** - * To test no data is lost or duplicated end-2-end. - * - *

    Schema: (key, timestamp, source instance Id). Producer Parallelism = 1; Kafka Partition # - * = 1; Consumer Parallelism = 1 - */ - private void testKafkaShuffle(int numElementsPerProducer, TimeCharacteristic timeCharacteristic) - throws Exception { - String topic = topic("test_simple-" + UUID.randomUUID(), timeCharacteristic); - final int numberOfPartitions = 1; - final int producerParallelism = 1; - - createTestTopic(topic, numberOfPartitions, 1); - - final StreamExecutionEnvironment env = - createEnvironment(producerParallelism, timeCharacteristic); - createKafkaShuffle( - env, - topic, - numElementsPerProducer, - producerParallelism, - timeCharacteristic, - numberOfPartitions) - .map( - new ElementCountNoMoreThanValidator( - numElementsPerProducer * producerParallelism)) - .setParallelism(1) - .map( - new ElementCountNoLessThanValidator( - numElementsPerProducer * producerParallelism)) - .setParallelism(1); - - tryExecute(env, topic); - - deleteTestTopic(topic); - } - - /** - * To test data is partitioned to the right partition. - * - *

    Schema: (key, timestamp, source instance Id). Producer Parallelism = 2; Kafka Partition # - * = 3; Consumer Parallelism = 3 - */ - private void testAssignedToPartition( - int numElementsPerProducer, TimeCharacteristic timeCharacteristic) throws Exception { - String topic = topic("test_assigned_to_partition-" + UUID.randomUUID(), timeCharacteristic); - final int numberOfPartitions = 3; - final int producerParallelism = 2; - - createTestTopic(topic, numberOfPartitions, 1); - - final StreamExecutionEnvironment env = - createEnvironment(producerParallelism, timeCharacteristic); - - KeyedStream, Tuple> keyedStream = - createKafkaShuffle( - env, - topic, - numElementsPerProducer, - producerParallelism, - timeCharacteristic, - numberOfPartitions); - keyedStream - .process( - new PartitionValidator( - keyedStream.getKeySelector(), numberOfPartitions, topic)) - .setParallelism(numberOfPartitions) - .map( - new ElementCountNoMoreThanValidator( - numElementsPerProducer * producerParallelism)) - .setParallelism(1) - .map( - new ElementCountNoLessThanValidator( - numElementsPerProducer * producerParallelism)) - .setParallelism(1); - - tryExecute(env, topic); - - deleteTestTopic(topic); - } - - /** - * To watermark from the consumer side always increase. - * - *

    Schema: (key, timestamp, source instance Id). Producer Parallelism = 2; Kafka Partition # - * = 3; Consumer Parallelism = 3 - */ - private void testWatermarkIncremental(int numElementsPerProducer) throws Exception { - TimeCharacteristic timeCharacteristic = EventTime; - String topic = topic("test_watermark_incremental-" + UUID.randomUUID(), timeCharacteristic); - final int numberOfPartitions = 3; - final int producerParallelism = 2; - - createTestTopic(topic, numberOfPartitions, 1); - - final StreamExecutionEnvironment env = - createEnvironment(producerParallelism, timeCharacteristic); - - KeyedStream, Tuple> keyedStream = - createKafkaShuffle( - env, - topic, - numElementsPerProducer, - producerParallelism, - timeCharacteristic, - numberOfPartitions, - true); - keyedStream - .process(new WatermarkValidator()) - .setParallelism(numberOfPartitions) - .map( - new ElementCountNoMoreThanValidator( - numElementsPerProducer * producerParallelism)) - .setParallelism(1) - .map( - new ElementCountNoLessThanValidator( - numElementsPerProducer * producerParallelism)) - .setParallelism(1); - - tryExecute(env, topic); - - deleteTestTopic(topic); - } - - private void testRecordSerDe(TimeCharacteristic timeCharacteristic) throws Exception { - final int numElementsPerProducer = 2000; - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - - // Records in a single partition are kept in order - Collection> records = - testKafkaShuffleProducer( - topic("test_serde-" + UUID.randomUUID(), timeCharacteristic), - env, - 1, - 1, - numElementsPerProducer, - timeCharacteristic) - .values() - .iterator() - .next(); - - switch (timeCharacteristic) { - case ProcessingTime: - // NonTimestampContext, no intermediate watermarks, and one end-of-event-time - // watermark - assertThat(records.size()).isEqualTo(numElementsPerProducer + 1); - break; - case IngestionTime: - // IngestionTime uses AutomaticWatermarkContext and it emits a watermark after every - // `watermarkInterval` - // with default interval 200, hence difficult to control the number of watermarks - break; - case EventTime: - // ManualWatermarkContext - // `numElementsPerProducer` records, `numElementsPerProducer` watermarks, and one - // end-of-event-time watermark - assertThat(records.size()).isEqualTo(numElementsPerProducer * 2 + 1); - break; - default: - fail("unknown TimeCharacteristic type"); - } - - TypeSerializer> typeSerializer = createTypeSerializer(env); - - KafkaShuffleElementDeserializer deserializer = - new KafkaShuffleElementDeserializer<>(typeSerializer); - - int recordIndex = 0; - int watermarkIndex = 0; - for (ConsumerRecord consumerRecord : records) { - assertThat(consumerRecord.key()).isNull(); - KafkaShuffleElement element = deserializer.deserialize(consumerRecord); - if (element.isRecord()) { - KafkaShuffleRecord> record = element.asRecord(); - switch (timeCharacteristic) { - case ProcessingTime: - assertThat(record.getTimestamp()).isNull(); - break; - case IngestionTime: - assertThat(record.getTimestamp()).isNotNull(); - break; - case EventTime: - assertThat(record.getValue().f1.longValue()) - .isEqualTo(record.getTimestamp().longValue()); - break; - default: - fail("unknown TimeCharacteristic type"); - } - assertThat(recordIndex).isEqualTo(record.getValue().f0.intValue()); - assertThat(INIT_TIMESTAMP + recordIndex) - .isEqualTo(record.getValue().f1.longValue()); - assertThat(0).isEqualTo(record.getValue().f2.intValue()); - recordIndex++; - } else if (element.isWatermark()) { - KafkaShuffleWatermark watermark = element.asWatermark(); - switch (timeCharacteristic) { - case ProcessingTime: - assertThat(watermark.getSubtask()).isEqualTo(0); - // the last element is the watermark that signifies end-of-event-time - assertThat(recordIndex).isEqualTo(numElementsPerProducer); - assertThat(Watermark.MAX_WATERMARK.getTimestamp()) - .isEqualTo(watermark.getWatermark()); - break; - case IngestionTime: - break; - case EventTime: - assertThat(watermark.getSubtask()).isEqualTo(0); - if (watermarkIndex == recordIndex) { - // the last element is the watermark that signifies end-of-event-time - assertThat(Watermark.MAX_WATERMARK.getTimestamp()) - .isEqualTo(watermark.getWatermark()); - } else { - assertThat(INIT_TIMESTAMP + watermarkIndex) - .isEqualTo(watermark.getWatermark()); - } - break; - default: - fail("unknown TimeCharacteristic type"); - } - watermarkIndex++; - } else { - fail("KafkaShuffleElement is either record or watermark"); - } - } - } - - private Map>> testKafkaShuffleProducer( - String topic, - StreamExecutionEnvironment env, - int numberOfPartitions, - int producerParallelism, - int numElementsPerProducer, - TimeCharacteristic timeCharacteristic) - throws Exception { - createTestTopic(topic, numberOfPartitions, 1); - - env.setParallelism(producerParallelism); - env.setRestartStrategy(RestartStrategies.noRestart()); - env.setStreamTimeCharacteristic(timeCharacteristic); - - DataStream> source = - env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)) - .setParallelism(producerParallelism); - DataStream> input = - (timeCharacteristic == EventTime) - ? source.assignTimestampsAndWatermarks(new PunctuatedExtractor()) - .setParallelism(producerParallelism) - : source; - - Properties properties = kafkaServer.getStandardProperties(); - Properties kafkaProperties = PropertiesUtil.flatten(properties); - - kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism)); - kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions)); - kafkaProperties.setProperty( - "key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); - kafkaProperties.setProperty( - "value.deserializer", - "org.apache.kafka.common.serialization.ByteArrayDeserializer"); - FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0); - - env.execute("Write to " + topic); - - Map>> results = new HashMap<>(); - - kafkaServer - .getAllRecordsFromTopic(kafkaProperties, topic) - .forEach( - r -> { - final int partition = r.partition(); - if (!results.containsKey(partition)) { - results.put(partition, new ArrayList<>()); - } - results.get(partition).add(r); - }); - - deleteTestTopic(topic); - - return results; - } - - private StreamExecutionEnvironment createEnvironment( - int producerParallelism, TimeCharacteristic timeCharacteristic) { - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(producerParallelism); - env.setStreamTimeCharacteristic(timeCharacteristic); - env.setRestartStrategy(RestartStrategies.noRestart()); - - return env; - } - - private TypeSerializer> createTypeSerializer( - StreamExecutionEnvironment env) { - return new TupleTypeInfo>( - BasicTypeInfo.INT_TYPE_INFO, - BasicTypeInfo.LONG_TYPE_INFO, - BasicTypeInfo.INT_TYPE_INFO) - .createSerializer(env.getConfig()); - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleTestBase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleTestBase.java deleted file mode 100644 index 064aebd7f..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/shuffle/KafkaShuffleTestBase.java +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.shuffle; - -import org.apache.flink.api.common.functions.MapFunction; -import org.apache.flink.api.java.functions.KeySelector; -import org.apache.flink.api.java.tuple.Tuple; -import org.apache.flink.api.java.tuple.Tuple3; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.runtime.state.KeyGroupRange; -import org.apache.flink.runtime.state.KeyGroupRangeAssignment; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.KeyedStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; -import org.apache.flink.streaming.api.functions.KeyedProcessFunction; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; -import org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase; -import org.apache.flink.streaming.connectors.kafka.KafkaProducerTestBase; -import org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironmentImpl; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionAssigner; -import org.apache.flink.test.util.SuccessException; -import org.apache.flink.util.Collector; - -import org.junit.BeforeClass; - -import java.util.Random; - -import static org.apache.flink.streaming.api.TimeCharacteristic.EventTime; - -/** Base Test Class for KafkaShuffle. */ -public class KafkaShuffleTestBase extends KafkaConsumerTestBase { - static final long INIT_TIMESTAMP = System.currentTimeMillis(); - - @BeforeClass - public static void prepare() throws Exception { - KafkaProducerTestBase.prepare(); - ((KafkaTestEnvironmentImpl) kafkaServer) - .setProducerSemantic(FlinkKafkaProducer.Semantic.EXACTLY_ONCE); - } - - static class KafkaSourceFunction - extends RichParallelSourceFunction> { - private volatile boolean running = true; - private final int numElementsPerProducer; - private final boolean unBounded; - - KafkaSourceFunction(int numElementsPerProducer) { - this.numElementsPerProducer = numElementsPerProducer; - this.unBounded = true; - } - - KafkaSourceFunction(int numElementsPerProducer, boolean unBounded) { - this.numElementsPerProducer = numElementsPerProducer; - this.unBounded = unBounded; - } - - @Override - public void run(SourceContext> ctx) throws Exception { - long timestamp = INIT_TIMESTAMP; - int sourceInstanceId = getRuntimeContext().getIndexOfThisSubtask(); - for (int i = 0; i < numElementsPerProducer && running; i++) { - ctx.collect(new Tuple3<>(i, timestamp++, sourceInstanceId)); - } - - while (running && unBounded) { - Thread.sleep(100); - } - } - - @Override - public void cancel() { - running = false; - } - } - - static KeyedStream, Tuple> createKafkaShuffle( - StreamExecutionEnvironment env, - String topic, - int numElementsPerProducer, - int producerParallelism, - TimeCharacteristic timeCharacteristic, - int numberOfPartitions) { - return createKafkaShuffle( - env, - topic, - numElementsPerProducer, - producerParallelism, - timeCharacteristic, - numberOfPartitions, - false); - } - - static KeyedStream, Tuple> createKafkaShuffle( - StreamExecutionEnvironment env, - String topic, - int numElementsPerProducer, - int producerParallelism, - TimeCharacteristic timeCharacteristic, - int numberOfPartitions, - boolean randomness) { - DataStream> source = - env.addSource(new KafkaSourceFunction(numElementsPerProducer)) - .setParallelism(producerParallelism); - DataStream> input = - (timeCharacteristic == EventTime) - ? source.assignTimestampsAndWatermarks(new PunctuatedExtractor(randomness)) - .setParallelism(producerParallelism) - : source; - - return FlinkKafkaShuffle.persistentKeyBy( - input, - topic, - producerParallelism, - numberOfPartitions, - kafkaServer.getStandardProperties(), - 0); - } - - static class PunctuatedExtractor - implements AssignerWithPunctuatedWatermarks> { - private static final long serialVersionUID = 1L; - boolean randomness; - Random rnd = new Random(123); - - PunctuatedExtractor() { - randomness = false; - } - - PunctuatedExtractor(boolean randomness) { - this.randomness = randomness; - } - - @Override - public long extractTimestamp( - Tuple3 element, long previousTimestamp) { - return element.f1; - } - - @Override - public Watermark checkAndGetNextWatermark( - Tuple3 lastElement, long extractedTimestamp) { - long randomValue = randomness ? rnd.nextInt(10) : 0; - return new Watermark(extractedTimestamp + randomValue); - } - } - - static class PartitionValidator - extends KeyedProcessFunction< - Tuple, Tuple3, Tuple3> { - private final KeySelector, Tuple> keySelector; - private final int numberOfPartitions; - private final String topic; - private KeyGroupRange keyGroupRange; - - private int previousPartition; - - PartitionValidator( - KeySelector, Tuple> keySelector, - int numberOfPartitions, - String topic) { - this.keySelector = keySelector; - this.numberOfPartitions = numberOfPartitions; - this.topic = topic; - this.previousPartition = -1; - } - - @Override - public void open(Configuration parameters) throws Exception { - super.open(parameters); - this.keyGroupRange = - KeyGroupRangeAssignment.computeKeyGroupRangeForOperatorIndex( - getRuntimeContext().getMaxNumberOfParallelSubtasks(), - numberOfPartitions, - getRuntimeContext().getIndexOfThisSubtask()); - } - - @Override - public void processElement( - Tuple3 in, - Context ctx, - Collector> out) - throws Exception { - int expectedSubtask = - KeyGroupRangeAssignment.assignKeyToParallelOperator( - keySelector.getKey(in), numberOfPartitions, numberOfPartitions); - int expectedPartition = -1; - // This is how Kafka assign partition to subTask; - for (int i = 0; i < numberOfPartitions; i++) { - if (KafkaTopicPartitionAssigner.assign(topic, i, numberOfPartitions) - == expectedSubtask) { - expectedPartition = i; - } - } - int indexOfThisSubtask = getRuntimeContext().getIndexOfThisSubtask(); - - boolean rightAssignment = - (expectedSubtask == indexOfThisSubtask) - && keyGroupRange.contains( - KeyGroupRangeAssignment.assignToKeyGroup( - keySelector.getKey(in), - getRuntimeContext().getMaxNumberOfParallelSubtasks())); - boolean samePartition = - (expectedPartition != -1) - && ((previousPartition == expectedPartition) - || (previousPartition == -1)); - previousPartition = expectedPartition; - - if (!(rightAssignment && samePartition)) { - throw new Exception("Error: Kafka partition assignment error "); - } - out.collect(in); - } - } - - static class WatermarkValidator - extends KeyedProcessFunction< - Tuple, Tuple3, Tuple3> { - private long previousWatermark = Long.MIN_VALUE; // initial watermark get from timeService - - @Override - public void processElement( - Tuple3 in, - Context ctx, - Collector> out) - throws Exception { - - long watermark = ctx.timerService().currentWatermark(); - - // Notice that the timerService might not be updated if no new watermark has been - // emitted, hence equivalent - // watermark is allowed, strictly incremental check is done when fetching watermark from - // KafkaShuffleFetcher. - if (watermark < previousWatermark) { - throw new Exception( - "Error: watermark should always increase. current watermark : previous watermark [" - + watermark - + " : " - + previousWatermark - + "]"); - } - previousWatermark = watermark; - - out.collect(in); - } - } - - static class ElementCountNoLessThanValidator - implements MapFunction, Tuple3> { - private final int totalCount; - private int counter = 0; - - ElementCountNoLessThanValidator(int totalCount) { - this.totalCount = totalCount; - } - - @Override - public Tuple3 map(Tuple3 element) - throws Exception { - counter++; - - if (counter == totalCount) { - throw new SuccessException(); - } - - return element; - } - } - - static class ElementCountNoMoreThanValidator - implements MapFunction, Tuple3> { - private final int totalCount; - private int counter = 0; - - ElementCountNoMoreThanValidator(int totalCount) { - this.totalCount = totalCount; - } - - @Override - public Tuple3 map(Tuple3 element) - throws Exception { - counter++; - - if (counter > totalCount) { - throw new Exception("Error: number of elements more than expected"); - } - - return element; - } - } - - String topic(String prefix, TimeCharacteristic timeCharacteristic) { - return prefix + "_" + timeCharacteristic; - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaChangelogTableITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaChangelogTableITCase.java index 632b74ac9..146ed2fa3 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaChangelogTableITCase.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaChangelogTableITCase.java @@ -21,11 +21,11 @@ import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.connector.kafka.sink.KafkaPartitioner; import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; import org.apache.flink.connector.kafka.sink.KafkaSink; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.table.api.TableConfig; import org.apache.flink.table.api.TableResult; import org.apache.flink.table.api.config.ExecutionConfigOptions; @@ -457,7 +457,7 @@ public void testKafkaMaxwellChangelogSource() throws Exception { private void writeRecordsToKafka(String topic, List lines) throws Exception { DataStreamSource stream = env.fromCollection(lines); SerializationSchema serSchema = new SimpleStringSchema(); - FlinkKafkaPartitioner partitioner = new FlinkFixedPartitioner<>(); + KafkaPartitioner partitioner = new FlinkFixedPartitioner<>(); // the producer must not produce duplicates Properties producerProperties = getStandardProps(); diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java index c1d796d08..98ccb5a07 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java @@ -26,6 +26,7 @@ import org.apache.flink.configuration.ConfigOptions; import org.apache.flink.configuration.Configuration; import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.connector.kafka.sink.KafkaPartitioner; import org.apache.flink.connector.kafka.sink.KafkaSink; import org.apache.flink.connector.kafka.source.KafkaSource; import org.apache.flink.connector.kafka.source.KafkaSourceOptions; @@ -42,9 +43,7 @@ import org.apache.flink.streaming.api.transformations.SourceTransformation; import org.apache.flink.streaming.connectors.kafka.config.BoundedMode; import org.apache.flink.streaming.connectors.kafka.config.StartupMode; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.ScanStartupMode; import org.apache.flink.streaming.connectors.kafka.testutils.MockPartitionOffsetsRetriever; import org.apache.flink.table.api.DataTypes; @@ -191,9 +190,9 @@ public void testTableSource() { final DynamicTableSource actualSource = createTableSource(SCHEMA, getBasicSourceOptions()); final KafkaDynamicSource actualKafkaSource = (KafkaDynamicSource) actualSource; - final Map specificOffsets = new HashMap<>(); - specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_0), OFFSET_0); - specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_1), OFFSET_1); + final Map specificOffsets = new HashMap<>(); + specificOffsets.put(new TopicPartition(TOPIC, PARTITION_0), OFFSET_0); + specificOffsets.put(new TopicPartition(TOPIC, PARTITION_1), OFFSET_1); final DecodingFormat> valueDecodingFormat = new DecodingFormatMock(",", true); @@ -235,7 +234,7 @@ public void testTableSourceWithPattern() { }); final DynamicTableSource actualSource = createTableSource(SCHEMA, modifiedOptions); - final Map specificOffsets = new HashMap<>(); + final Map specificOffsets = new HashMap<>(); DecodingFormat> valueDecodingFormat = new DecodingFormatMock(",", true); @@ -1169,9 +1168,9 @@ public void testDiscoverPartitionByDefault() { props.putAll(KAFKA_SOURCE_PROPERTIES); // The default partition discovery interval is 5 minutes props.setProperty("partition.discovery.interval.ms", "300000"); - final Map specificOffsets = new HashMap<>(); - specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_0), OFFSET_0); - specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_1), OFFSET_1); + final Map specificOffsets = new HashMap<>(); + specificOffsets.put(new TopicPartition(TOPIC, PARTITION_0), OFFSET_0); + specificOffsets.put(new TopicPartition(TOPIC, PARTITION_1), OFFSET_1); final DecodingFormat> valueDecodingFormat = new DecodingFormatMock(",", true); // Test scan source equals @@ -1207,9 +1206,9 @@ public void testDisableDiscoverPartition() { props.putAll(KAFKA_SOURCE_PROPERTIES); // Disable discovery if the partition discovery interval is 0 minutes props.setProperty("partition.discovery.interval.ms", "0"); - final Map specificOffsets = new HashMap<>(); - specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_0), OFFSET_0); - specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_1), OFFSET_1); + final Map specificOffsets = new HashMap<>(); + specificOffsets.put(new TopicPartition(TOPIC, PARTITION_0), OFFSET_0); + specificOffsets.put(new TopicPartition(TOPIC, PARTITION_1), OFFSET_1); final DecodingFormat> valueDecodingFormat = new DecodingFormatMock(",", true); // Test scan source equals @@ -1248,7 +1247,7 @@ private static KafkaDynamicSource createExpectedScanSource( @Nullable Pattern topicPattern, Properties properties, StartupMode startupMode, - Map specificStartupOffsets, + Map specificStartupOffsets, long startupTimestampMillis) { return new KafkaDynamicSource( physicalDataType, @@ -1280,7 +1279,7 @@ private static KafkaDynamicSink createExpectedSink( @Nullable List topics, @Nullable Pattern topicPattern, Properties properties, - @Nullable FlinkKafkaPartitioner partitioner, + @Nullable KafkaPartitioner partitioner, DeliveryGuarantee deliveryGuarantee, @Nullable Integer parallelism, String transactionalIdPrefix) { diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaTableITCase.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaTableITCase.java index acd0550e4..6620d088a 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaTableITCase.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaTableITCase.java @@ -21,6 +21,7 @@ import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.CoreOptions; +import org.apache.flink.connector.kafka.sink.KafkaPartitioner; import org.apache.flink.core.execution.JobClient; import org.apache.flink.core.execution.SavepointFormatType; import org.apache.flink.runtime.jobgraph.SavepointConfigOptions; @@ -28,7 +29,6 @@ import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.sink.SinkFunction; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.table.api.TableResult; import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import org.apache.flink.table.api.config.TableConfigOptions; @@ -1467,7 +1467,7 @@ private void testStartFromGroupOffsetsWithNoneResetStrategy() // -------------------------------------------------------------------------------------------- /** Extract the partition id from the row and set it on the record. */ - public static class TestPartitioner extends FlinkKafkaPartitioner { + public static class TestPartitioner implements KafkaPartitioner { private static final long serialVersionUID = 1L; private static final int PARTITION_ID_FIELD_IN_SCHEMA = 0; diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java deleted file mode 100644 index 92978a783..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.testutils; - -import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.common.serialization.SimpleStringSchema; -import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.apache.flink.streaming.api.operators.StreamSink; -import org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironment; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; - -import java.util.Properties; -import java.util.Random; - -/** Test data generators. */ -@SuppressWarnings("serial") -public class DataGenerators { - - public static void generateRandomizedIntegerSequence( - StreamExecutionEnvironment env, - KafkaTestEnvironment testServer, - String topic, - final int numPartitions, - final int numElements, - final boolean randomizeOrder) - throws Exception { - env.setParallelism(numPartitions); - env.setRestartStrategy(RestartStrategies.noRestart()); - - DataStream stream = - env.addSource( - new RichParallelSourceFunction() { - - private volatile boolean running = true; - - @Override - public void run(SourceContext ctx) { - // create a sequence - int[] elements = new int[numElements]; - for (int i = 0, val = getRuntimeContext().getIndexOfThisSubtask(); - i < numElements; - i++, - val += - getRuntimeContext() - .getNumberOfParallelSubtasks()) { - - elements[i] = val; - } - - // scramble the sequence - if (randomizeOrder) { - Random rnd = new Random(); - for (int i = 0; i < elements.length; i++) { - int otherPos = rnd.nextInt(elements.length); - - int tmp = elements[i]; - elements[i] = elements[otherPos]; - elements[otherPos] = tmp; - } - } - - // emit the sequence - int pos = 0; - while (running && pos < elements.length) { - ctx.collect(elements[pos++]); - } - } - - @Override - public void cancel() { - running = false; - } - }); - - Properties props = new Properties(); - props.putAll( - KafkaUtils.getPropertiesFromBrokerList(testServer.getBrokerConnectionString())); - Properties secureProps = testServer.getSecureProperties(); - if (secureProps != null) { - props.putAll(testServer.getSecureProperties()); - } - // Ensure the producer enables idempotence. - props.putAll(testServer.getIdempotentProducerConfig()); - - stream = stream.rebalance(); - testServer.produceIntoKafka( - stream, - topic, - new TypeInformationSerializationSchema<>( - BasicTypeInfo.INT_TYPE_INFO, env.getConfig()), - props, - new FlinkKafkaPartitioner() { - @Override - public int partition( - Integer next, - byte[] serializedKey, - byte[] serializedValue, - String topic, - int[] partitions) { - return next % partitions.length; - } - }); - - env.execute("Scrambles int sequence generator"); - } - - // ------------------------------------------------------------------------ - - /** - * A generator that continuously writes strings into the configured topic. The generation is - * stopped if an exception occurs or {@link #shutdown()} is called. - */ - public static class InfiniteStringsGenerator extends Thread { - - private final KafkaTestEnvironment server; - - private final String topic; - - private volatile Throwable error; - - private volatile boolean running = true; - - public InfiniteStringsGenerator(KafkaTestEnvironment server, String topic) { - this.server = server; - this.topic = topic; - } - - @Override - public void run() { - // we manually feed data into the Kafka sink - - Properties producerProperties = - KafkaUtils.getPropertiesFromBrokerList(server.getBrokerConnectionString()); - producerProperties.setProperty("retries", "3"); - - StreamSink sink = - server.getProducerSink( - topic, - new SimpleStringSchema(), - producerProperties, - new FlinkFixedPartitioner<>()); - try (OneInputStreamOperatorTestHarness testHarness = - new OneInputStreamOperatorTestHarness<>(sink)) { - testHarness.open(); - - final StringBuilder bld = new StringBuilder(); - final Random rnd = new Random(); - - while (running) { - bld.setLength(0); - - int len = rnd.nextInt(100) + 1; - for (int i = 0; i < len; i++) { - bld.append((char) (rnd.nextInt(20) + 'a')); - } - - String next = bld.toString(); - testHarness.processElement(new StreamRecord<>(next)); - } - } catch (Throwable t) { - this.error = t; - } - } - - public void shutdown() { - this.running = false; - } - - public Throwable getError() { - return this.error; - } - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/TestPartitionDiscoverer.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/TestPartitionDiscoverer.java deleted file mode 100644 index 5731273f8..000000000 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/TestPartitionDiscoverer.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.kafka.testutils; - -import org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; -import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor; - -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; - -import java.util.ArrayList; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -/** - * Utility {@link AbstractPartitionDiscoverer} for tests that allows mocking the sequence of - * consecutive metadata fetch calls to Kafka. - */ -public class TestPartitionDiscoverer extends AbstractPartitionDiscoverer { - - private final KafkaTopicsDescriptor topicsDescriptor; - - private final List> mockGetAllTopicsReturnSequence; - private final List> mockGetAllPartitionsForTopicsReturnSequence; - - private int getAllTopicsInvokeCount = 0; - private int getAllPartitionsForTopicsInvokeCount = 0; - - public TestPartitionDiscoverer( - KafkaTopicsDescriptor topicsDescriptor, - int indexOfThisSubtask, - int numParallelSubtasks, - List> mockGetAllTopicsReturnSequence, - List> mockGetAllPartitionsForTopicsReturnSequence) { - - super(topicsDescriptor, indexOfThisSubtask, numParallelSubtasks); - - this.topicsDescriptor = topicsDescriptor; - this.mockGetAllTopicsReturnSequence = mockGetAllTopicsReturnSequence; - this.mockGetAllPartitionsForTopicsReturnSequence = - mockGetAllPartitionsForTopicsReturnSequence; - } - - @Override - protected List getAllTopics() { - assertThat(topicsDescriptor.isTopicPattern()).isTrue(); - return mockGetAllTopicsReturnSequence.get(getAllTopicsInvokeCount++); - } - - @Override - protected List getAllPartitionsForTopics(List topics) { - if (topicsDescriptor.isFixedTopics()) { - assertThat(topics).isEqualTo(topicsDescriptor.getFixedTopics()); - } else { - assertThat(topics) - .isEqualTo( - mockGetAllTopicsReturnSequence.get( - getAllPartitionsForTopicsInvokeCount - 1)); - } - return mockGetAllPartitionsForTopicsReturnSequence.get( - getAllPartitionsForTopicsInvokeCount++); - } - - @Override - protected void initializeConnections() { - // nothing to do - } - - @Override - protected void wakeupConnections() { - // nothing to do - } - - @Override - protected void closeConnections() { - // nothing to do - } - - // --------------------------------------------------------------------------------- - // Utilities to create mocked, fixed results for a sequences of metadata fetches - // --------------------------------------------------------------------------------- - - public static List> createMockGetAllTopicsSequenceFromFixedReturn( - final List fixed) { - @SuppressWarnings("unchecked") - List> mockSequence = mock(List.class); - when(mockSequence.get(anyInt())) - .thenAnswer( - new Answer>() { - @Override - public List answer(InvocationOnMock invocationOnMock) - throws Throwable { - return new ArrayList<>(fixed); - } - }); - - return mockSequence; - } - - public static List> - createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn( - final List fixed) { - @SuppressWarnings("unchecked") - List> mockSequence = mock(List.class); - when(mockSequence.get(anyInt())) - .thenAnswer( - new Answer>() { - @Override - public List answer( - InvocationOnMock invocationOnMock) throws Throwable { - return new ArrayList<>(fixed); - } - }); - - return mockSequence; - } -} diff --git a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/Tuple2FlinkPartitioner.java b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/Tuple2FlinkPartitioner.java index bc1db674e..df714a449 100644 --- a/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/Tuple2FlinkPartitioner.java +++ b/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/Tuple2FlinkPartitioner.java @@ -19,13 +19,13 @@ package org.apache.flink.streaming.connectors.kafka.testutils; import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; +import org.apache.flink.connector.kafka.sink.KafkaPartitioner; /** * Special partitioner that uses the first field of a 2-tuple as the partition, and that expects a * specific number of partitions. */ -public class Tuple2FlinkPartitioner extends FlinkKafkaPartitioner> { +public class Tuple2FlinkPartitioner implements KafkaPartitioner> { private static final long serialVersionUID = -3589898230375281549L; private final int expectedPartitions;