Skip to content

Commit

Permalink
Upgrade DataFusion to 21.0.0 (#37)
Browse files Browse the repository at this point in the history
fix #33
  • Loading branch information
kou committed Apr 2, 2023
1 parent 8e5199f commit b433005
Show file tree
Hide file tree
Showing 12 changed files with 332 additions and 180 deletions.
10 changes: 6 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Sutou Kouhei <kou@clear-code.com>
# Copyright 2022-2023 Sutou Kouhei <kou@clear-code.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -15,15 +15,15 @@
[package]
name = "datafusion-c"
description = "DataFusion C API"
version = "10.0.0"
version = "21.0.0"
homepage = "https://github.com/datafusion-contrib/datafusion-c"
repository = "https://github.com/datafusion-contrib/datafusion-c"
readme = "README.md"
authors = ["Sutou Kouhei <kou@clear-code.com>"]
license = "Apache-2.0"
keywords = ["arrow", "c"]
edition = "2021"
rust-version = "1.59"
rust-version = "1.64"
exclude = [
"/cpp/",
]
Expand All @@ -37,7 +37,9 @@ default = ["capi"]
capi = []

[dependencies]
datafusion = "10"
arrow = { version = "34", features = ["ffi", "prettyprint"] }
arrow-data = "34"
datafusion = "21"
libc = "0.2"
tokio = "1"

Expand Down
61 changes: 33 additions & 28 deletions datafusion-glib/csv-read-options.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022 Sutou Kouhei <kou@clear-code.com>
* Copyright 2022-2023 Sutou Kouhei <kou@clear-code.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,7 @@ G_BEGIN_DECLS
typedef struct GDFCSVReadOptionsPrivate_ {
DFCSVReadOptions *options;
GArrowSchema *schema;
GArrowSchema *table_partition_columns;
gchar *file_extension;
} GDFCSVReadOptionsPrivate;

Expand Down Expand Up @@ -61,6 +62,10 @@ gdf_csv_read_options_dispose(GObject *object)
g_object_unref(priv->schema);
priv->schema = NULL;
}
if (priv->table_partition_columns) {
g_object_unref(priv->table_partition_columns);
priv->table_partition_columns = NULL;
}
G_OBJECT_CLASS(gdf_csv_read_options_parent_class)->dispose(object);
}

Expand Down Expand Up @@ -336,9 +341,8 @@ gdf_csv_read_options_get_file_extension(GDFCSVReadOptions *options)
/**
* gdf_csv_read_options_set_table_partition_columns:
* @options: A #GDFCSVReadOptions.
* @columns: (array length=n_columns): Column names that are used for table
* @schema: Column names and their types that are used for table
* partition.
* @n_columns: The number of column names.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: %TRUE on success, %FALSE otherwise.
Expand All @@ -347,19 +351,36 @@ gdf_csv_read_options_get_file_extension(GDFCSVReadOptions *options)
*/
gboolean
gdf_csv_read_options_set_table_partition_columns(GDFCSVReadOptions *options,
const gchar **columns,
gsize n_columns,
GArrowSchema *schema,
GError **error)
{
GDFCSVReadOptionsPrivate *priv =
gdf_csv_read_options_get_instance_private(options);
if (priv->table_partition_columns == schema) {
return TRUE;
}
gpointer c_abi_schema = NULL;
if (schema) {
c_abi_schema = garrow_schema_export(schema, error);
if (!c_abi_schema) {
return FALSE;
}
}
DFError *df_error = NULL;
bool success =
df_csv_read_options_set_table_partition_columns(priv->options,
columns,
n_columns,
c_abi_schema,
&df_error);
if (!success) {
if (success) {
if (priv->table_partition_columns) {
g_object_unref(priv->table_partition_columns);
}
if (schema) {
priv->table_partition_columns = g_object_ref(schema);
} else {
priv->table_partition_columns = NULL;
}
} else {
g_set_error(error,
GDF_ERROR,
df_error_get_code(df_error),
Expand All @@ -374,33 +395,17 @@ gdf_csv_read_options_set_table_partition_columns(GDFCSVReadOptions *options,
* gdf_csv_read_options_get_table_partition_columns:
* @options: A #GDFCSVReadOptions.
*
* Returns: (transfer full) (nullable): The column names that are used for
* table partition.
*
* It should be freed by g_strfreev() when no longer needed.
* Returns: (transfer none) (nullable): The column names and their
* types that are used for table partition.
*
* Since: 10.0.0
*/
gchar **
GArrowSchema *
gdf_csv_read_options_get_table_partition_columns(GDFCSVReadOptions *options)
{
GDFCSVReadOptionsPrivate *priv =
gdf_csv_read_options_get_instance_private(options);
uintptr_t df_n_columns = 0;
char **df_columns =
df_csv_read_options_get_table_partition_columns(priv->options, &df_n_columns);
if (df_n_columns == 0) {
return NULL;
}
gchar **columns = g_new(gchar *, df_n_columns + 1);
uintptr_t i;
for (i = 0; i < df_n_columns; i++) {
columns[i] = g_strdup(df_columns[i]);
free(df_columns[i]);
}
columns[i] = NULL;
free(df_columns);
return columns;
return priv->table_partition_columns;
}

DFCSVReadOptions *
Expand Down
7 changes: 3 additions & 4 deletions datafusion-glib/csv-read-options.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022 Sutou Kouhei <kou@clear-code.com>
* Copyright 2022-2023 Sutou Kouhei <kou@clear-code.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -58,11 +58,10 @@ gdf_csv_read_options_get_file_extension(GDFCSVReadOptions *options);
GDF_AVAILABLE_IN_10_0
gboolean
gdf_csv_read_options_set_table_partition_columns(GDFCSVReadOptions *options,
const gchar **columns,
gsize n_columns,
GArrowSchema *schema,
GError **error);
GDF_AVAILABLE_IN_10_0
gchar **
GArrowSchema *
gdf_csv_read_options_get_table_partition_columns(GDFCSVReadOptions *options);


Expand Down
107 changes: 77 additions & 30 deletions datafusion-glib/parquet-read-options.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022 Sutou Kouhei <kou@clear-code.com>
* Copyright 2022-2023 Sutou Kouhei <kou@clear-code.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,7 @@ G_BEGIN_DECLS

typedef struct GDFParquetReadOptionsPrivate_ {
DFParquetReadOptions *options;
GArrowSchema *table_partition_columns;
gchar *file_extension;
} GDFParquetReadOptionsPrivate;

Expand All @@ -50,6 +51,19 @@ gdf_parquet_read_options_finalize(GObject *object)
G_OBJECT_CLASS(gdf_parquet_read_options_parent_class)->finalize(object);
}

static void
gdf_parquet_read_options_dispose(GObject *object)
{
GDFParquetReadOptionsPrivate *priv =
gdf_parquet_read_options_get_instance_private(
GDF_PARQUET_READ_OPTIONS(object));
if (priv->table_partition_columns) {
g_object_unref(priv->table_partition_columns);
priv->table_partition_columns = NULL;
}
G_OBJECT_CLASS(gdf_parquet_read_options_parent_class)->dispose(object);
}

static void
gdf_parquet_read_options_set_property(GObject *object,
guint prop_id,
Expand Down Expand Up @@ -109,6 +123,7 @@ gdf_parquet_read_options_class_init(GDFParquetReadOptionsClass *klass)
{
GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
gobject_class->finalize = gdf_parquet_read_options_finalize;
gobject_class->dispose = gdf_parquet_read_options_dispose;
gobject_class->set_property = gdf_parquet_read_options_set_property;
gobject_class->get_property = gdf_parquet_read_options_get_property;

Expand Down Expand Up @@ -209,9 +224,8 @@ gdf_parquet_read_options_get_file_extension(GDFParquetReadOptions *options)
/**
* gdf_parquet_read_options_set_table_partition_columns:
* @options: A #GDFParquetReadOptions.
* @columns: (array length=n_columns): Column names that are used for table
* @schema: Column names and their types that are used for table
* partition.
* @n_columns: The number of column names.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: %TRUE on success, %FALSE otherwise.
Expand All @@ -221,19 +235,36 @@ gdf_parquet_read_options_get_file_extension(GDFParquetReadOptions *options)
gboolean
gdf_parquet_read_options_set_table_partition_columns(
GDFParquetReadOptions *options,
const gchar **columns,
gsize n_columns,
GArrowSchema *schema,
GError **error)
{
GDFParquetReadOptionsPrivate *priv =
gdf_parquet_read_options_get_instance_private(options);
if (priv->table_partition_columns == schema) {
return TRUE;
}
gpointer c_abi_schema = NULL;
if (schema) {
c_abi_schema = garrow_schema_export(schema, error);
if (!c_abi_schema) {
return FALSE;
}
}
DFError *df_error = NULL;
bool success =
df_parquet_read_options_set_table_partition_columns(priv->options,
columns,
n_columns,
c_abi_schema,
&df_error);
if (!success) {
if (success) {
if (priv->table_partition_columns) {
g_object_unref(priv->table_partition_columns);
}
if (schema) {
priv->table_partition_columns = g_object_ref(schema);
} else {
priv->table_partition_columns = NULL;
}
} else {
g_set_error(error,
GDF_ERROR,
df_error_get_code(df_error),
Expand All @@ -248,35 +279,51 @@ gdf_parquet_read_options_set_table_partition_columns(
* gdf_parquet_read_options_get_table_partition_columns:
* @options: A #GDFParquetReadOptions.
*
* Returns: (transfer full) (nullable): The column names that are used for
* table partition.
*
* It should be freed by g_strfreev() when no longer needed.
* Returns: (transfer none) (nullable): The column names and their
* types that are used for table partition.
*
* Since: 10.0.0
*/
gchar **
GArrowSchema *
gdf_parquet_read_options_get_table_partition_columns(
GDFParquetReadOptions *options)
{
GDFParquetReadOptionsPrivate *priv =
gdf_parquet_read_options_get_instance_private(options);
uintptr_t df_n_columns = 0;
char **df_columns =
df_parquet_read_options_get_table_partition_columns(priv->options,
&df_n_columns);
if (df_n_columns == 0) {
return NULL;
}
gchar **columns = g_new(gchar *, df_n_columns + 1);
uintptr_t i;
for (i = 0; i < df_n_columns; i++) {
columns[i] = g_strdup(df_columns[i]);
free(df_columns[i]);
}
columns[i] = NULL;
free(df_columns);
return columns;
gdf_parquet_read_options_get_instance_private(options);
return priv->table_partition_columns;
}

/**
* gdf_parquet_read_options_unset_pruning:
* @options: A #GDFParquetReadOptions.
*
* Unset pruning for the options. If the options doesn't have pruning,
* value in session is used.
*
* Since: 21.0.0
*/
void
gdf_parquet_read_options_unset_pruning(GDFParquetReadOptions *options)
{
GDFParquetReadOptionsPrivate *priv =
gdf_parquet_read_options_get_instance_private(options);
df_parquet_read_options_unset_pruning(priv->options);
}

/**
* gdf_parquet_read_options_is_set_pruning:
* @options: A #GDFParquetReadOptions.
*
* Returns: %TRUE when the options have pruning value, %FALSE otherwise.
*
* Since: 21.0.0
*/
gboolean
gdf_parquet_read_options_is_set_pruning(GDFParquetReadOptions *options)
{
GDFParquetReadOptionsPrivate *priv =
gdf_parquet_read_options_get_instance_private(options);
return df_parquet_read_options_is_set_pruning(priv->options);
}

DFParquetReadOptions *
Expand Down
14 changes: 10 additions & 4 deletions datafusion-glib/parquet-read-options.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022 Sutou Kouhei <kou@clear-code.com>
* Copyright 2022-2023 Sutou Kouhei <kou@clear-code.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -50,14 +50,20 @@ GDF_AVAILABLE_IN_10_0
gboolean
gdf_parquet_read_options_set_table_partition_columns(
GDFParquetReadOptions *options,
const gchar **columns,
gsize n_columns,
GArrowSchema *schema,
GError **error);
GDF_AVAILABLE_IN_10_0
gchar **
GArrowSchema *
gdf_parquet_read_options_get_table_partition_columns(
GDFParquetReadOptions *options);

GDF_AVAILABLE_IN_21_0
void
gdf_parquet_read_options_unset_pruning(GDFParquetReadOptions *options);

GDF_AVAILABLE_IN_21_0
gboolean
gdf_parquet_read_options_is_set_pruning(GDFParquetReadOptions *options);


G_END_DECLS
Loading

0 comments on commit b433005

Please sign in to comment.