package com.bjzhanghao;

import org.apache.hadoop.fs.Path;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;

import java.io.File;
import java.io.IOException;

public class ParquetDemoTest {

    @Rule
    public TemporaryFolder tempFolder = new TemporaryFolder();

    @org.junit.Test
    public void writeAndReadParquet() throws IOException {

        final int MAX_ROWS = 3000;
        final int MAX_FIELDS = 500;
        final String[] ALL_FIELDS = new String[MAX_FIELDS];
        for (int j = 0; j < ALL_FIELDS.length; j++) {
            ALL_FIELDS[j] = "field" + j;
        }

        // Write parquet for test
        File parquetFile = tempFolder.newFile("demo.parquet");
        Path parquetPath = new Path(tempFolder.newFile("demo.parquet").getPath());

        // 测试不同行数的写入时间，其中第一次为预热写入
        int[] numRows = new int[]{100, 100, 500, 1000, 2000, MAX_ROWS};
        for (int n : numRows) {
            parquetFile.delete();
            Long t = System.currentTimeMillis();
            new WriteParquetDemo().writeParquet(n, ALL_FIELDS, parquetPath);
            System.out.println("写入Parquet文件（Group）, " + n + " 行 x " + ALL_FIELDS.length + " 列, 耗时 " + (System.currentTimeMillis() - t) + " ms");
        }

        // 测试过滤条件下，不同行数的读取时间，其中第一次为预热读取
        int[] numFields = new int[]{5, 5, 10, 15, 50, 100, 200, 300, 400, MAX_FIELDS};
        for (int n : numFields) {
            Long t = System.currentTimeMillis();
            String[] fields = new String[n];
            for (int j = 0; j < fields.length; j++) {
                fields[j] = "field" + j;
            }
            new ReadParquetDemo().readParquetWithReadSchema(parquetPath, fields);
            System.out.println("读取Parquet文件（过滤列）, " + MAX_ROWS + " 行 x " + fields.length + " 列, 耗时 " + (System.currentTimeMillis() - t) + " ms");
        }

        // 测试不过滤时，不同行数的读取时间，其中第一次为预热读取
        for (int n : numFields) {
            Long t = System.currentTimeMillis();
            String[] fields = new String[n];
            for (int j = 0; j < fields.length; j++) {
                fields[j] = "field" + j;
            }
            new ReadParquetDemo().readParquetNoReadSchema(parquetPath, fields);
            System.out.println("读取Parquet文件（未过滤列）, " + MAX_ROWS + " 行 x " + fields.length + " 列, 耗时 " + (System.currentTimeMillis() - t) + " ms");
        }

        System.out.println("Done");

    }

}