Skip to content

Polars

Polars plugin provides Schema and Fields models and converters between Polars and Data Package notations

Installation

Extra dependency needs to be installed:

pip install dplib-py[polars]

Usage

Converting a Polars dataframe to the Data Package notation:

from dplib.plugins.polars.models import PolarsSchema

schema = PolarsSchema(df=df).to_dp()
print(schema.to_text(format='json'))

Converting from Data Package notation to Polars:

from dplib.models import Schema
from dplib.plugins.polars.models import PolarsSchema

schema = PolarsSchema.from_dp(Schema.from_path('data/schema.json'))
print(schema.df)

Reference

dplib.plugins.polars.models.PolarsSchema

Bases: Model

Polars Schema model

Source code in dplib/plugins/polars/models/schema.py
class PolarsSchema(Model, arbitrary_types_allowed=True):
    """Polars Schema model"""

    df: pl.DataFrame

    # Getters

    def get_field_names(self) -> List[str]:
        """Get field names"""
        return list(self.df.schema.keys())

    def get_field_types(self) -> List[pl.PolarsDataType]:
        """Get field types"""
        return list(self.df.schema.values())

    # Converters

    def to_dp(self) -> Schema:
        """Convert to Table Schema

        Returns:
            Table Schema
        """
        schema = Schema()

        # Fields
        for name, dtype in self.df.schema.items():
            field = PolarsField(name=name, dtype=dtype).to_dp()
            schema.fields.append(field)

        return schema

    @classmethod
    def from_dp(cls, schema: Schema) -> PolarsSchema:
        """Create Polars Schema from Table Schema

        Parameters:
            schema: Table Schema

        Returns:
            Polars Schema
        """
        columns: Dict[str, pl.PolarsDataType] = {}

        # Fields
        for field in schema.fields:
            polars_field = PolarsField.from_dp(field)
            columns[polars_field.name] = polars_field.dtype

        return PolarsSchema(df=pl.DataFrame(schema=columns))

df: pl.DataFrame instance-attribute

from_dp(schema) classmethod

Create Polars Schema from Table Schema

Parameters:

Name Type Description Default
schema Schema

Table Schema

required

Returns:

Type Description
PolarsSchema

Polars Schema

Source code in dplib/plugins/polars/models/schema.py
@classmethod
def from_dp(cls, schema: Schema) -> PolarsSchema:
    """Create Polars Schema from Table Schema

    Parameters:
        schema: Table Schema

    Returns:
        Polars Schema
    """
    columns: Dict[str, pl.PolarsDataType] = {}

    # Fields
    for field in schema.fields:
        polars_field = PolarsField.from_dp(field)
        columns[polars_field.name] = polars_field.dtype

    return PolarsSchema(df=pl.DataFrame(schema=columns))

get_field_names()

Get field names

Source code in dplib/plugins/polars/models/schema.py
def get_field_names(self) -> List[str]:
    """Get field names"""
    return list(self.df.schema.keys())

get_field_types()

Get field types

Source code in dplib/plugins/polars/models/schema.py
def get_field_types(self) -> List[pl.PolarsDataType]:
    """Get field types"""
    return list(self.df.schema.values())

to_dp()

Convert to Table Schema

Returns:

Type Description
Schema

Table Schema

Source code in dplib/plugins/polars/models/schema.py
def to_dp(self) -> Schema:
    """Convert to Table Schema

    Returns:
        Table Schema
    """
    schema = Schema()

    # Fields
    for name, dtype in self.df.schema.items():
        field = PolarsField(name=name, dtype=dtype).to_dp()
        schema.fields.append(field)

    return schema

dplib.plugins.polars.models.PolarsField

Bases: Model

Polars Field model

Source code in dplib/plugins/polars/models/field.py
class PolarsField(Model, arbitrary_types_allowed=True):
    """Polars Field model"""

    name: str
    dtype: Any
    #  dtype: pl.PolarsDataType

    # Converters

    def to_dp(self) -> models.IField:
        """Convert to Table Schema Field

        Returns:
            Table Schema Field
        """

        # Type
        Field = models.Field
        if self.dtype in ARRAY_TYPES:
            Field = models.ArrayField
        elif self.dtype in BOOLEAN_TYPES:
            Field = models.BooleanField
        elif self.dtype in DATE_TYPES:
            Field = models.DateField
        elif self.dtype in DATETIME_TYPES:
            Field = models.DatetimeField
        elif self.dtype in DURATION_TYPES:
            Field = models.DurationField
        elif self.dtype in INTEGER_TYPES:
            Field = models.IntegerField
        elif self.dtype in NUMBER_TYPES:
            Field = models.NumberField
        elif self.dtype in OBJECT_TYPES:
            Field = models.ObjectField
        elif self.dtype in STRING_TYPES:
            Field = models.StringField
        elif self.dtype in TIME_TYPES:
            Field = models.TimeField

        # Name
        field = Field(name=self.name)

        return field

    @classmethod
    def from_dp(cls, field: models.IField) -> PolarsField:
        """Create Polars Field from Table Schema Field

        Parameters:
            field: Table Schema Field

        Returns:
            Polars Field
        """
        if not field.name:
            raise Error(f"Field name is required to convert to polars: {field}")

        # Type
        dtype = pl.Utf8
        if field.type == "array":
            dtype = pl.List
        elif field.type == "boolean":
            dtype = pl.Boolean
        elif field.type == "date":
            dtype = pl.Date
        elif field.type == "datetime":
            dtype = pl.Datetime
        elif field.type == "duration":
            dtype = pl.Duration
        elif field.type == "geojson":
            dtype = pl.Struct
        elif field.type == "geopoint":
            dtype = pl.List
        elif field.type == "integer":
            dtype = pl.Int64
        elif field.type == "number":
            dtype = pl.Decimal
        elif field.type == "object":
            dtype = pl.Struct
        elif field.type == "string":
            dtype = pl.Utf8
        elif field.type == "time":
            dtype = pl.Time
        elif field.type == "year":
            dtype = pl.Int8
        elif field.type == "yearmonth":
            dtype = pl.List

        return PolarsField(name=field.name, dtype=dtype)

dtype: Any instance-attribute

name: str instance-attribute

from_dp(field) classmethod

Create Polars Field from Table Schema Field

Parameters:

Name Type Description Default
field IField

Table Schema Field

required

Returns:

Type Description
PolarsField

Polars Field

Source code in dplib/plugins/polars/models/field.py
@classmethod
def from_dp(cls, field: models.IField) -> PolarsField:
    """Create Polars Field from Table Schema Field

    Parameters:
        field: Table Schema Field

    Returns:
        Polars Field
    """
    if not field.name:
        raise Error(f"Field name is required to convert to polars: {field}")

    # Type
    dtype = pl.Utf8
    if field.type == "array":
        dtype = pl.List
    elif field.type == "boolean":
        dtype = pl.Boolean
    elif field.type == "date":
        dtype = pl.Date
    elif field.type == "datetime":
        dtype = pl.Datetime
    elif field.type == "duration":
        dtype = pl.Duration
    elif field.type == "geojson":
        dtype = pl.Struct
    elif field.type == "geopoint":
        dtype = pl.List
    elif field.type == "integer":
        dtype = pl.Int64
    elif field.type == "number":
        dtype = pl.Decimal
    elif field.type == "object":
        dtype = pl.Struct
    elif field.type == "string":
        dtype = pl.Utf8
    elif field.type == "time":
        dtype = pl.Time
    elif field.type == "year":
        dtype = pl.Int8
    elif field.type == "yearmonth":
        dtype = pl.List

    return PolarsField(name=field.name, dtype=dtype)

to_dp()

Convert to Table Schema Field

Returns:

Type Description
IField

Table Schema Field

Source code in dplib/plugins/polars/models/field.py
def to_dp(self) -> models.IField:
    """Convert to Table Schema Field

    Returns:
        Table Schema Field
    """

    # Type
    Field = models.Field
    if self.dtype in ARRAY_TYPES:
        Field = models.ArrayField
    elif self.dtype in BOOLEAN_TYPES:
        Field = models.BooleanField
    elif self.dtype in DATE_TYPES:
        Field = models.DateField
    elif self.dtype in DATETIME_TYPES:
        Field = models.DatetimeField
    elif self.dtype in DURATION_TYPES:
        Field = models.DurationField
    elif self.dtype in INTEGER_TYPES:
        Field = models.IntegerField
    elif self.dtype in NUMBER_TYPES:
        Field = models.NumberField
    elif self.dtype in OBJECT_TYPES:
        Field = models.ObjectField
    elif self.dtype in STRING_TYPES:
        Field = models.StringField
    elif self.dtype in TIME_TYPES:
        Field = models.TimeField

    # Name
    field = Field(name=self.name)

    return field