/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg.flink.maintenance.operator;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.connector.source.util.ratelimit.RateLimiterStrategy;
import org.apache.flink.configuration.CheckpointingOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.execution.JobClient;
import org.apache.flink.runtime.client.JobExecutionException;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.ContentFile;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.RewriteFiles;
import org.apache.iceberg.Table;
import org.apache.iceberg.data.GenericAppenderHelper;
import org.apache.iceberg.data.RandomGenericData;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.flink.TableLoader;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.awaitility.Awaitility;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

class TestMonitorSource extends OperatorTestBase {
  private static final TableChange EMPTY_EVENT = TableChange.empty();
  private static final RateLimiterStrategy HIGH_RATE = RateLimiterStrategy.perSecond(100.0);
  private static final RateLimiterStrategy LOW_RATE = RateLimiterStrategy.perSecond(1.0 / 10000.0);

  @TempDir private File checkpointDir;

  @ParameterizedTest
  @ValueSource(booleans = {true, false})
  void testChangeReaderIterator(boolean withDelete) throws IOException {
    Table table = withDelete ? createTableWithDelete() : createTable();

    MonitorSource.TableChangeIterator iterator =
        new MonitorSource.TableChangeIterator(tableLoader(), null, Long.MAX_VALUE);

    // For an empty table we get an empty result
    assertThat(iterator.next()).isEqualTo(EMPTY_EVENT);

    // Add a single commit and get back the commit data in the event
    insert(table, 1, "a");
    TableChange expected = tableChangeWithLastSnapshot(table, TableChange.empty());
    assertThat(iterator.next()).isEqualTo(expected);
    // Make sure that consecutive calls do not return the data again
    assertThat(iterator.next()).isEqualTo(EMPTY_EVENT);

    // Add two more commits, but fetch the data in one loop
    insert(table, 2, "b");
    expected = tableChangeWithLastSnapshot(table, TableChange.empty());

    insert(table, 3, "c");
    expected = tableChangeWithLastSnapshot(table, expected);

    assertThat(iterator.next()).isEqualTo(expected);
    // Make sure that consecutive calls do not return the data again
    assertThat(iterator.next()).isEqualTo(EMPTY_EVENT);
  }

  /**
   * Create a table and check that the source returns the data as new commits arrive to the table.
   */
  @Test
  void testSource() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Table table = createTable();
    DataStream<TableChange> events =
        env.fromSource(
                new MonitorSource(tableLoader(), HIGH_RATE, Long.MAX_VALUE),
                WatermarkStrategy.noWatermarks(),
                "TableChangeSource")
            .forceNonParallel();

    // Sink to collect the results
    CollectingSink<TableChange> result = new CollectingSink<>();
    events.sinkTo(result);

    JobClient jobClient = null;
    try {
      // First result is an empty event
      jobClient = env.executeAsync("Table Change Source Test");
      assertThat(result.poll(Duration.ofSeconds(5L))).isEqualTo(EMPTY_EVENT);

      // Insert some data
      File dataDir = new File(new Path(table.location(), "data").toUri().getPath());
      dataDir.mkdir();
      GenericAppenderHelper dataAppender =
          new GenericAppenderHelper(table, FileFormat.PARQUET, dataDir.toPath());
      List<Record> batch1 = RandomGenericData.generate(table.schema(), 2, 1);
      dataAppender.appendToTable(batch1);

      // Wait until the changes are committed
      Awaitility.await()
          .until(
              () -> {
                table.refresh();
                return table.currentSnapshot() != null;
              });

      table.refresh();
      long size = firstFileLength(table);

      // Wait until the first non-empty event has arrived, and check the expected result
      Awaitility.await()
          .until(
              () -> {
                TableChange newEvent = result.poll(Duration.ofSeconds(5L));
                // Fetch every empty event from the beginning
                while (newEvent.equals(EMPTY_EVENT)) {
                  newEvent = result.poll(Duration.ofSeconds(5L));
                }

                // The first non-empty event should contain the expected value
                return newEvent.equals(
                    TableChange.builder()
                        .dataFileCount(1)
                        .dataFileSizeInBytes(size)
                        .commitCount(1)
                        .build());
              });
    } finally {
      closeJobClient(jobClient);
    }
  }

  /** Check that the {@link MonitorSource} operator state is restored correctly. */
  @Test
  void testStateRestore(@TempDir File savepointDir) throws Exception {
    Table table = createTable();
    insert(table, 1, "a");
    TableLoader tableLoader = tableLoader();

    Configuration config = new Configuration();
    config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
    config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file://" + checkpointDir.getPath());
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(config);
    env.enableCheckpointing(1000);

    DataStream<TableChange> events =
        env.fromSource(
                new MonitorSource(tableLoader, HIGH_RATE, Long.MAX_VALUE),
                WatermarkStrategy.noWatermarks(),
                "TableChangeSource")
            .forceNonParallel();

    // Sink to collect the results
    CollectingSink<TableChange> result = new CollectingSink<>();
    events.sinkTo(result);

    // Start the job
    Configuration conf;
    JobClient jobClient = null;
    AtomicReference<TableChange> firstNonEmptyEvent = new AtomicReference<>();
    try {
      jobClient = env.executeAsync("Table Change Source Test");

      Awaitility.await()
          .until(
              () -> {
                TableChange newEvent = result.poll(Duration.ofSeconds(5L));
                // Fetch every empty event from the beginning
                while (newEvent.equals(EMPTY_EVENT)) {
                  newEvent = result.poll(Duration.ofSeconds(5L));
                }

                // The first non-empty event should contain the expected value
                firstNonEmptyEvent.set(newEvent);
                return true;
              });
    } finally {
      // Stop with savepoint
      conf = closeJobClient(jobClient, savepointDir);
    }

    // Restore from savepoint, create the same topology with a different env
    env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
    events =
        env.fromSource(
                new MonitorSource(tableLoader, LOW_RATE, Long.MAX_VALUE),
                WatermarkStrategy.noWatermarks(),
                "TableChangeSource")
            .forceNonParallel();
    CollectingSink<TableChange> resultWithSavepoint = new CollectingSink<>();
    events.sinkTo(resultWithSavepoint);

    // Make sure that the job with restored source does not read new records from the table
    JobClient clientWithSavepoint = null;
    try {
      clientWithSavepoint = env.executeAsync("Table Change Source test with savepoint");

      assertThat(resultWithSavepoint.poll(Duration.ofSeconds(5L))).isEqualTo(EMPTY_EVENT);
    } finally {
      closeJobClient(clientWithSavepoint, null);
    }

    // Restore without savepoint
    env = StreamExecutionEnvironment.getExecutionEnvironment();
    events =
        env.fromSource(
                new MonitorSource(tableLoader, LOW_RATE, Long.MAX_VALUE),
                WatermarkStrategy.noWatermarks(),
                "TableChangeSource")
            .forceNonParallel();
    CollectingSink<TableChange> resultWithoutSavepoint = new CollectingSink<>();
    events.sinkTo(resultWithoutSavepoint);

    // Make sure that a new job without state reads the event as expected
    JobClient clientWithoutSavepoint = null;
    try {
      clientWithoutSavepoint = env.executeAsync("Table Change Source Test without savepoint");
      assertThat(resultWithoutSavepoint.poll(Duration.ofSeconds(5L)))
          .isEqualTo(firstNonEmptyEvent.get());
    } finally {
      closeJobClient(clientWithoutSavepoint);
    }
  }

  @Test
  void testNotOneParallelismThrows() {
    createTable();

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    env.fromSource(
            new MonitorSource(tableLoader(), HIGH_RATE, Long.MAX_VALUE),
            WatermarkStrategy.noWatermarks(),
            "TableChangeSource")
        .setParallelism(2)
        .print();

    assertThatThrownBy(env::execute)
        .isInstanceOf(JobExecutionException.class)
        .rootCause()
        .isInstanceOf(IllegalArgumentException.class)
        .hasMessage("Parallelism should be set to 1");
  }

  @Test
  void testMaxReadBack() throws IOException {
    Table table = createTable();
    insert(table, 1, "a");
    insert(table, 2, "b");
    insert(table, 3, "c");

    TableLoader tableLoader = tableLoader();

    MonitorSource.TableChangeIterator iterator =
        new MonitorSource.TableChangeIterator(tableLoader, null, 1);

    // For a single maxReadBack we only get a single change
    assertThat(iterator.next().commitCount()).isEqualTo(1);

    iterator = new MonitorSource.TableChangeIterator(tableLoader, null, 2);

    // Expecting 2 commits/snapshots for maxReadBack=2
    assertThat(iterator.next().commitCount()).isEqualTo(2);

    iterator = new MonitorSource.TableChangeIterator(tableLoader, null, Long.MAX_VALUE);

    // For maxReadBack Long.MAX_VALUE we get every change
    assertThat(iterator.next().commitCount()).isEqualTo(3);
  }

  @Test
  void testSkipReplace() throws IOException {
    Table table = createTable();
    insert(table, 1, "a");

    TableLoader tableLoader = tableLoader();

    MonitorSource.TableChangeIterator iterator =
        new MonitorSource.TableChangeIterator(tableLoader, null, Long.MAX_VALUE);

    // Read the current snapshot
    assertThat(iterator.next().commitCount()).isEqualTo(1);

    // Create a DataOperations.REPLACE snapshot
    DataFile dataFile =
        table.snapshots().iterator().next().addedDataFiles(table.io()).iterator().next();
    RewriteFiles rewrite = tableLoader.loadTable().newRewrite();
    // Replace the file with itself for testing purposes
    rewrite.deleteFile(dataFile);
    rewrite.addFile(dataFile);
    rewrite.commit();

    // Check that the rewrite is ignored
    assertThat(iterator.next()).isEqualTo(EMPTY_EVENT);
  }

  private static long firstFileLength(Table table) {
    return table.currentSnapshot().addedDataFiles(table.io()).iterator().next().fileSizeInBytes();
  }

  private static TableChange tableChangeWithLastSnapshot(Table table, TableChange previous) {
    List<DataFile> dataFiles =
        Lists.newArrayList(table.currentSnapshot().addedDataFiles(table.io()).iterator());
    List<DeleteFile> deleteFiles =
        Lists.newArrayList(table.currentSnapshot().addedDeleteFiles(table.io()).iterator());

    long dataSize = dataFiles.stream().mapToLong(ContentFile::fileSizeInBytes).sum();
    long deleteRecordCount = deleteFiles.stream().mapToLong(DeleteFile::recordCount).sum();

    TableChange newChange = previous.copy();
    newChange.merge(
        TableChange.builder()
            .dataFileCount(dataFiles.size())
            .dataFileSizeInBytes(dataSize)
            // Currently we only test with equality deletes
            .eqDeleteFileCount(deleteFiles.size())
            .eqDeleteRecordCount(deleteRecordCount)
            .commitCount(1)
            .build());
    return newChange;
  }
}
