この度データのhyper化とpublishを自動化する業務にたずさわったため記録する。
目的
hyperapiを駆使して、
pythonのデータフレームから、hyperファイルを作成する。
hyperapi:Tableau Hyper API
意義
hyperファイル:tableauの抽出ファイルの形式
pythonでhyperファイルを作成できると更新を自動化することができる。
自分の業務では、bigquerryからデータを取り出してそれを自動でhyper化するプロジェクトを開発した。
hyperファイルの作成
サンプルのデータを「customer.hyper」として実行したディレクトリに作成する。
from pathlib import Path
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, \
    HyperException
# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NOT_NULLABLE)
#カラムの設定をする
#TableDefinition.Column(name='カラム名(何でもいい)', type='型定義', nullability=NOT_NULLABLE(NULLABLE))
    ]
)columns[]の中身にカラムを設定する。設定の仕方は以下の通り。
TableDefinition.Column(name=’カラム名(何でもいい)’, type=’型定義’, nullability=NOT_NULLABLE(もしくはNULLABLE))
型定義
整数 = SqlType.big_int() (公式より)
小数 = SqlType.double() (floatはダメだった)
文字列 = SqlType.text()
日付 = SqlType.date()
def run_insert_data_into_single_table():
    """
    An example demonstrating a simple single-table Hyper file including table creation and data insertion with different types
    """
    print("EXAMPLE - Insert data into a single table within a new Hyper file")
    path_to_database = Path("customer.hyper")
    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        # Creates new Hyper file "customer.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            connection.catalog.create_schema(schema=extract_table.table_name.schema_name)
            connection.catalog.create_table(table_definition=extract_table)
            # The rows to insert into the "Extract"."Extract" table.
            data_to_insert = [
                ["DJ-13375", "Dennis Kane", 518, "Consumer"],
                ["EB-13705", "Ed Braxton", 815, "Corporate"],
                ["cm-001", "rei tamai", 777, "Vice President"],
            ]
            with Inserter(connection, extract_table) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()
            # The table names in the "Extract" schema (the default schema).
            table_names = connection.catalog.get_table_names("Extract")
            print(f"Tables available in {path_to_database} are: {table_names}")
            # Number of rows in the "Extract"."Extract" table.
            # `execute_scalar_query` is for executing a query that returns exactly one row with one column.
            row_count = connection.execute_scalar_query(query=f"SELECT COUNT(*) FROM {extract_table.table_name}")
            print(f"The number of rows in table {extract_table.table_name} is {row_count}.")
        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
if __name__ == '__main__':
    try:
        run_insert_data_into_single_table()
    except HyperException as ex:
        print(ex)
        exit(1)
data_to_insert[]にデータを格納する。
格納するデータは
 nmp = data_frame.to_numpy().tolist()でデータフレームから作れる。
その場合
data_to_insert = nmpと定義すれば機能してくれる。
PS C:\Users\techn\Documents\program\python> python extract.hyper\geography3.py
The HyperProcess has started.
The connection to the Hyper file is open.
The geo_table is defined.
The data was added to the table.
The connection to the Hyper extract file is closed.
The HyperProcess has shut down.こんな感じのが出ればOK!

hyperファイルの更新
イメージは
- 中身のデータを削除
- 新しいデータを挿入
from pathlib import Path
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, \
    HyperException
# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NOT_NULLABLE)
#カラムの設定をする
#TableDefinition.Column(name='カラム名(何でもいい)', type='型定義', nullability=NOT_NULLABLE(NULLABLE))
    ]
)
def run_insert_data_into_single_table():
    """
    An example demonstrating a simple single-table Hyper file including table creation and data insertion with different types
    """
    print("EXAMPLE - Insert data into a single table within a new Hyper file")
    path_to_database = Path("customer.hyper")
    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        # Creates new Hyper file "customer.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.NONE) as connection:
          #createmode をNONEにする
          
          #ここでデータを消す
             table_name = TableName("Extract", "Extract")
             row_count = connection.execute_command(
                command=f"DELETE FROM {table_name} "
                )
           # connection.catalog.create_schema(schema=extract_table.table_name.schema_name)
           # connection.catalog.create_table(table_definition=extract_table)
           # この部分を消す
            # The rows to insert into the "Extract"."Extract" table.
            data_to_insert = [
                ["DJ-13375", "Dennis Kane", 518, "Consumer"],
                ["EB-13705", "Ed Braxton", 815, "Corporate"],
                ["cm-001", "rei tamai", 777, "Vice President"],
            ]
            with Inserter(connection, extract_table) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()
            # The table names in the "Extract" schema (the default schema).
            table_names = connection.catalog.get_table_names("Extract")
            print(f"Tables available in {path_to_database} are: {table_names}")
            # Number of rows in the "Extract"."Extract" table.
            # `execute_scalar_query` is for executing a query that returns exactly one row with one column.
            row_count = connection.execute_scalar_query(query=f"SELECT COUNT(*) FROM {extract_table.table_name}")
            print(f"The number of rows in table {extract_table.table_name} is {row_count}.")
        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
if __name__ == '__main__':
    try:
        run_insert_data_into_single_table()
    except HyperException as ex:
        print(ex)
        exit(1)最後に
これで自由にhyperファイルを作成できるようになりました。
DMP構築のご相談
お気軽にお問い合わせください


