Exporting Pigment metadata using the Metadata API

Forum|Forum|5 months ago
September 23, 2025
2 replies
112 views

DamianAtPigment
Employee

Pigment Metadata Export

This Python script connects to the Pigment Metadata API to fetch a full hierarchy of Workspaces, Applications, Blocks, Views, and Import Configurations. It then compiles this metadata into a single CSV file to clearly show the relationships between all entities.

This can prove very useful to Solution Architects and Modellers to help view all the objects contained in a workspace/application.

Please read this Community article for more information on the Pigment Metadata API

Python metadata extraction script (metadata_extract.py)

#!/usr/bin/env python3
"""A command-line tool to fetch and export a comprehensive Pigment metadata report.

This script connects to the Pigment API to fetch a full hierarchy of Workspaces,
Applications, Blocks, Views, and Import Configurations. It then denormalizes and
compiles this metadata into a single, comprehensive CSV file to clearly show the
relationships between all entities.
"""
import os
import csv
import logging
import sys
import argparse
import itertools
from typing import Generator, Any, Dict, List
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- 1. Configure Logging ---
class TqdmLoggingHandler(logging.Handler):
    """A logging handler that uses tqdm.write() to prevent conflicts with bars."""
    def __init__(self, level=logging.NOTSET): super().__init__(level)
    def emit(self, record):
        try: msg = self.format(record); tqdm.write(msg); self.flush()
        except (KeyboardInterrupt, SystemExit): raise
        except Exception: self.handleError(record)

log = logging.getLogger()
log.setLevel(logging.INFO)
for handler in log.handlers[:]: log.removeHandler(handler)
handler = TqdmLoggingHandler()
formatter = logging.Formatter('%(asctime)s - [%(levelname)s] - %(message)s', '%Y-%m-%d %H:%M:%S')
handler.setFormatter(formatter)
log.addHandler(handler)


# --- 2. API Client Class ---
class PigmentMetadataClient:
    """A client for interacting with the Pigment Metadata API."""
    API_BASE_URL = 'https://pigment.app/api/v1'

    def __init__(self, api_key: str):
        if not api_key: raise ValueError("API key is required.")
        self._headers = {'Authorization': f"Bearer {api_key}"}
        self._session = self._create_retry_session()

    def _create_retry_session(self) -> requests.Session:
        session = requests.Session()
        retry_strategy = Retry(total=3, status_forcelist=[429, 500, 502, 503, 504], backoff_factor=1)
        adapter = HTTPAdapter(max_retries=retry_strategy)
        session.mount("https://", adapter)
        return session

    def _fetch_data(self, url: str) -> List[Dict[str, Any]]:
        try:
            response = self._session.get(url, headers=self._headers); response.raise_for_status(); return response.json()
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch data from {url}: {e}"); return []

    def get_applications(self) -> List[Dict[str, Any]]:
        logging.info("Fetching list of applications...")
        apps = self._fetch_data(f"{self.API_BASE_URL}/applications")
        logging.info(f"Found {len(apps)} applications.")
        return apps

    def get_blocks(self, app_id: str) -> List[Dict[str, Any]]:
        return self._fetch_data(f"{self.API_BASE_URL}/blocks?applicationID={app_id}")

    def get_views(self, app_id: str, block_id: str) -> List[Dict[str, Any]]:
        return self._fetch_data(f"{self.API_BASE_URL}/views?applicationID={app_id}&blockID={block_id}")

    def get_import_configs(self, app_id: str, block_id: str) -> List[Dict[str, Any]]:
        return self._fetch_data(f"{self.API_BASE_URL}/importConfigurations?applicationID={app_id}&blockID={block_id}")


# --- 3. Core Logic ---

def fetch_block_details(client: PigmentMetadataClient, app: Dict[str, Any], block: Dict[str, Any]) -> Dict[str, Any]:
    """Fetches views and imports for a single block and returns all related data."""
    app_id = app.get('id')
    block_id = block.get('id')

    views = client.get_views(app_id, block_id)
    imports = []
    if block.get('type') != 'Table':
        imports = client.get_import_configs(app_id, block_id)

    return {'app': app, 'block': block, 'views': views, 'imports': imports}

def generate_metadata_rows(client: PigmentMetadataClient, application_id: str | None = None) -> Generator[Dict[str, Any], None, None]:
    """Fetches and yields comprehensive, denormalized metadata rows using concurrent requests."""
    apps = client.get_applications()
    if not apps:
        logging.warning("No applications found or failed to fetch."); return

    apps_to_process = [app for app in apps if app.get('id') == application_id] if application_id else apps
    if not apps_to_process:
        logging.warning(f"Application with ID '{application_id}' not found."); return

    logging.info("Discovering all blocks to be processed...")
    blocks_to_process = []
    for app in apps_to_process:
        blocks = client.get_blocks(app.get('id'))
        for block in blocks:
            blocks_to_process.append({'app': app, 'block': block})

    if not blocks_to_process:
        logging.warning("No blocks found in the selected application(s).")
        for app in apps_to_process:
            workspace_info = app.get('workspace', {})
            yield {'workspace_id': workspace_info.get('id', ''), 'workspace_name': workspace_info.get('name', ''), 'application_id': app.get('id'), 'application_name': app.get('name'), 'block_id': '', 'block_name': '', 'block_type': '', 'view_id': '', 'view_name': '', 'view_type': '', 'import_id': '', 'import_name': ''}
        return

    logging.info(f"Found {len(blocks_to_process)} total blocks. Fetching details concurrently...")

    # Use a ThreadPoolExecutor to fetch block details in parallel
    with ThreadPoolExecutor(max_workers=10) as executor:
        # Create a future for each block detail fetch
        future_to_block = {executor.submit(fetch_block_details, client, item['app'], item['block']): item for item in blocks_to_process}

        # Process results as they complete, updating the progress bar
        for future in tqdm(as_completed(future_to_block), total=len(blocks_to_process), desc="Processing Blocks"):
            result = future.result()
            app, block, views, imports = result['app'], result['block'], result['views'], result['imports']

            workspace_info = app.get('workspace', {})
            block_row = {
                'workspace_id': workspace_info.get('id', ''), 'workspace_name': workspace_info.get('name', ''),
                'application_id': app.get('id'), 'application_name': app.get('name'),
                'block_id': block.get('id'), 'block_name': block.get('name'), 'block_type': block.get('type')
            }

            view_iter = views or [{'id': '', 'name': '', 'type': ''}]
            import_iter = imports or [{'id': '', 'name': ''}]

            for view, imp in itertools.product(view_iter, import_iter):
                yield {**block_row, 'view_id': view.get('id', ''), 'view_name': view.get('name', ''), 'view_type': view.get('type', ''), 'import_id': imp.get('id', ''), 'import_name': imp.get('name', '')}


def write_csv(data: List[Dict[str, Any]], output_file: str) -> None:
    """Writes a list of dictionaries to a CSV file with a fixed header order."""
    if not data:
        logging.warning("No data to write to CSV."); return

    headers = ['workspace_id', 'workspace_name', 'application_id', 'application_name', 'block_id', 'block_name', 'block_type', 'view_id', 'view_name', 'view_type', 'import_id', 'import_name']

    try:
        with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction='ignore')
            writer.writeheader()
            writer.writerows(data)
        logging.info(f"Successfully wrote {len(data)} rows to {output_file}")
    except IOError as e:
        logging.error(f"Failed to write to file {output_file}: {e}")

# --- 4. Main Execution Block ---
def main():
    """Main entry point for the script."""
    parser = argparse.ArgumentParser(description="Fetch a comprehensive metadata report from the Pigment API and export to a single CSV file.", formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-o", "--output-file", default="pigment_metadata_report.csv", help="Path for the output CSV file (default: pigment_metadata_report.csv).")
    parser.add_argument("-a", "--application-id", default=None, help="Optional: A specific Application ID to process, skipping the interactive prompt.")
    args = parser.parse_args()

    api_key = os.getenv('PIGMENT_METADATA_API_KEY')
    if not api_key:
        logging.error("PIGMENT_METADATA_API_KEY environment variable not set. This is required."); sys.exit(1)

    try:
        client = PigmentMetadataClient(api_key)
        app_id_to_process = args.application_id

        if not app_id_to_process:
            applications = client.get_applications()
            if not applications:
                logging.error("Could not fetch any applications to process."); sys.exit(1)

            print("\nPlease select an application to process:\n")
            print("[0] Process ALL applications")
            for i, app in enumerate(applications):
                print(f"[{i+1}] {app.get('name')} (ID: {app.get('id')})")

            while True:
                try:
                    choice = int(input("\nEnter your choice: "));
                    if 0 <= choice <= len(applications): break
                    else: print(f"Invalid choice. Please enter a number between 0 and {len(applications)}.")
                except ValueError: print("Invalid input. Please enter a number.")

            if choice > 0:
                app_id_to_process = applications[choice - 1]['id']

        all_metadata_rows = list(generate_metadata_rows(client, app_id_to_process))
        write_csv(all_metadata_rows, args.output_file)

    except KeyboardInterrupt:
        logging.warning("\nProcess interrupted by user. Exiting."); sys.exit(0)
    except Exception as e:
        logging.critical(f"A critical error stopped the process: {e}", exc_info=True); sys.exit(1)

if __name__ == "__main__":
    main()

This script needs the following environment variable:

Environment

--- Linux ---

export PIGMENT_METADATA_API_KEY="Metadata API key"

--- Windows ---

set PIGMENT_METADATA_API_KEY="Metadata API key"

--- Powershell ---

$env:PIGMENT_METADATA_API_KEY = "Metadata API key"

Python requirements.txt

certifi==2025.8.3
charset-normalizer==3.4.3
idna==3.10
requests==2.32.5
tqdm==4.67.1
urllib3==2.5.0

To run this script:

python metadata_extract.py

Command-Line Arguments:

-o, --output-file: Specifies the path for the output CSV file. If omitted, it defaults to pigment_metadata_report.csv.
-a, --application-id: An optional argument to specify a single Application ID to process. This bypasses the interactive selection prompt.

FPA
Forum|Forum|5 months ago
September 23, 2025

Can you get the metrics blocks metadata like format, formula, dimensions, dependcies, override formula info ?

DamianAtPigment
Author
Employee
Forum|Forum|5 months ago
September 25, 2025

Can you get the metrics blocks metadata like format, formula, dimensions, dependcies, override formula info ?

Afraid not at this stage.

Ask your question here!

Not finding what you're looking for?

Sign up

Single Sign-On Need help?

Log into the Pigment Community

Single Sign-On Need help?

Scanning file for viruses.

This file cannot be downloaded