python - DeltaTable map type - Stack Overflow

时间: 2025-01-06 admin 业界

Using Spark, I can create a delta table with a map column type: MAP<STRING, TIMESTAMP>
How do I create a delta table with a map type without Spark?
I have tried multiple approaches and none of them are working.

import pyarrow as pa
from deltalake import write_deltalake

# Create a sample Arrow Table with a map type
data = {
    "id": pa.array([1, 2, 3]),
    "name": pa.array(["Alice", "Bob", "Charlie"]),
    "attributes": pa.array([
        pa.array([("age", 30)], type=pa.map_(pa.string(), pa.int32())),
        pa.array([("age", 25)], type=pa.map_(pa.string(), pa.int32())),
        pa.array([("age", 35)], type=pa.map_(pa.string(), pa.int32())),
    ])
}

# Create an Arrow Table
table = pa.Table.from_pydict(data)

# Define the path where the Delta table will be stored
delta_table_path = "./tmp/delta_map"

# Write the Arrow Table to a Delta table
write_deltalake(delta_table_path, data=table, mode="overwrite")

pyarrow throws: pyarrow.lib.ArrowTypeError: Could not convert 'a' with type str: was expecting tuple of (key, value) pair

from deltalake import Schema, Field, DeltaTable, WriterProperties, write_deltalake
from deltalake.schema import PrimitiveType, MapType

# Define the schema for the Delta table
schema = Schema([
    Field("id",PrimitiveType("string")),
    Field("data", MapType("integer", "string", value_contains_null=False))
])

# Create a list of data to write to the Delta table
data = [
    {"id": "1", "data": {"key1": "value1", "key2": "value2"}},
    {"id": "2", "data": {"key3": "value3", "key4": "value4"}}
]

# Create a Delta table
delta_table = write_deltalake(table_or_uri="./tmp/delta_map", data=data,
    schema=schema,mode="append",
    writer_properties=WriterProperties(compression="ZSTD")
)

# Write the data to the Delta table
delta_table.write_data(data)

deltalake throws: NotImplementedError: ArrowSchemaConversionMode.passthrough is not implemented to work with DeltaSchema, skip passing a schema or pass an arrow schema. Thx