Pigment Metadata Export
This Python script connects to the Pigment Metadata API to fetch a full hierarchy of Workspaces, Applications, Blocks, Views, and Import Configurations. It then compiles this metadata into a single CSV file to clearly show the relationships between all entities.
This can prove very useful to Solution Architects and Modellers to help view all the objects contained in a workspace/application.
Please read this Community article for more information on the Pigment Metadata API
Python metadata extraction script (metadata_extract.py)
#!/usr/bin/env python3
"""A command-line tool to fetch and export a comprehensive Pigment metadata report.
This script connects to the Pigment API to fetch a full hierarchy of Workspaces,
Applications, Blocks, Views, and Import Configurations. It then denormalizes and
compiles this metadata into a single, comprehensive CSV file to clearly show the
relationships between all entities.
"""
import os
import csv
import logging
import sys
import argparse
import itertools
from typing import Generator, Any, Dict, List
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
# --- 1. Configure Logging ---
class TqdmLoggingHandler(logging.Handler):
"""A logging handler that uses tqdm.write() to prevent conflicts with bars."""
def __init__(self, level=logging.NOTSET): super().__init__(level)
def emit(self, record):
try: msg = self.format(record); tqdm.write(msg); self.flush()
except (KeyboardInterrupt, SystemExit): raise
except Exception: self.handleError(record)
log = logging.getLogger()
log.setLevel(logging.INFO)
for handler in log.handlers[:]: log.removeHandler(handler)
handler = TqdmLoggingHandler()
formatter = logging.Formatter('%(asctime)s - [%(levelname)s] - %(message)s', '%Y-%m-%d %H:%M:%S')
handler.setFormatter(formatter)
log.addHandler(handler)
# --- 2. API Client Class ---
class PigmentMetadataClient:
"""A client for interacting with the Pigment Metadata API."""
API_BASE_URL = 'https://pigment.app/api/v1'
def __init__(self, api_key: str):
if not api_key: raise ValueError("API key is required.")
self._headers = {'Authorization': f"Bearer {api_key}"}
self._session = self._create_retry_session()
def _create_retry_session(self) -> requests.Session:
session = requests.Session()
retry_strategy = Retry(total=3, status_forcelist=[429, 500, 502, 503, 504], backoff_factor=1)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
return session
def _fetch_data(self, url: str) -> List[Dict[str, Any]]:
try:
response = self._session.get(url, headers=self._headers); response.raise_for_status(); return response.json()
except requests.exceptions.RequestException as e:
logging.error(f"Failed to fetch data from {url}: {e}"); return []
def get_applications(self) -> List[Dict[str, Any]]:
logging.info("Fetching list of applications...")
apps = self._fetch_data(f"{self.API_BASE_URL}/applications")
logging.info(f"Found {len(apps)} applications.")
return apps
def get_blocks(self, app_id: str) -> List[Dict[str, Any]]:
return self._fetch_data(f"{self.API_BASE_URL}/blocks?applicationID={app_id}")
def get_views(self, app_id: str, block_id: str) -> List[Dict[str, Any]]:
return self._fetch_data(f"{self.API_BASE_URL}/views?applicationID={app_id}&blockID={block_id}")
def get_import_configs(self, app_id: str, block_id: str) -> List[Dict[str, Any]]:
return self._fetch_data(f"{self.API_BASE_URL}/importConfigurations?applicationID={app_id}&blockID={block_id}")
# --- 3. Core Logic ---
def fetch_block_details(client: PigmentMetadataClient, app: Dict[str, Any], block: Dict[str, Any]) -> Dict[str, Any]:
"""Fetches views and imports for a single block and returns all related data."""
app_id = app.get('id')
block_id = block.get('id')
views = client.get_views(app_id, block_id)
imports = []
if block.get('type') != 'Table':
imports = client.get_import_configs(app_id, block_id)
return {'app': app, 'block': block, 'views': views, 'imports': imports}
def generate_metadata_rows(client: PigmentMetadataClient, application_id: str | None = None) -> Generator[Dict[str, Any], None, None]:
"""Fetches and yields comprehensive, denormalized metadata rows using concurrent requests."""
apps = client.get_applications()
if not apps:
logging.warning("No applications found or failed to fetch."); return
apps_to_process = [app for app in apps if app.get('id') == application_id] if application_id else apps
if not apps_to_process:
logging.warning(f"Application with ID '{application_id}' not found."); return
logging.info("Discovering all blocks to be processed...")
blocks_to_process = []
for app in apps_to_process:
blocks = client.get_blocks(app.get('id'))
for block in blocks:
blocks_to_process.append({'app': app, 'block': block})
if not blocks_to_process:
logging.warning("No blocks found in the selected application(s).")
for app in apps_to_process:
workspace_info = app.get('workspace', {})
yield {'workspace_id': workspace_info.get('id', ''), 'workspace_name': workspace_info.get('name', ''), 'application_id': app.get('id'), 'application_name': app.get('name'), 'block_id': '', 'block_name': '', 'block_type': '', 'view_id': '', 'view_name': '', 'view_type': '', 'import_id': '', 'import_name': ''}
return
logging.info(f"Found {len(blocks_to_process)} total blocks. Fetching details concurrently...")
# Use a ThreadPoolExecutor to fetch block details in parallel
with ThreadPoolExecutor(max_workers=10) as executor:
# Create a future for each block detail fetch
future_to_block = {executor.submit(fetch_block_details, client, item['app'], item['block']): item for item in blocks_to_process}
# Process results as they complete, updating the progress bar
for future in tqdm(as_completed(future_to_block), total=len(blocks_to_process), desc="Processing Blocks"):
result = future.result()
app, block, views, imports = result['app'], result['block'], result['views'], result['imports']
workspace_info = app.get('workspace', {})
block_row = {
'workspace_id': workspace_info.get('id', ''), 'workspace_name': workspace_info.get('name', ''),
'application_id': app.get('id'), 'application_name': app.get('name'),
'block_id': block.get('id'), 'block_name': block.get('name'), 'block_type': block.get('type')
}
view_iter = views or [{'id': '', 'name': '', 'type': ''}]
import_iter = imports or [{'id': '', 'name': ''}]
for view, imp in itertools.product(view_iter, import_iter):
yield {**block_row, 'view_id': view.get('id', ''), 'view_name': view.get('name', ''), 'view_type': view.get('type', ''), 'import_id': imp.get('id', ''), 'import_name': imp.get('name', '')}
def write_csv(data: List[Dict[str, Any]], output_file: str) -> None:
"""Writes a list of dictionaries to a CSV file with a fixed header order."""
if not data:
logging.warning("No data to write to CSV."); return
headers = ['workspace_id', 'workspace_name', 'application_id', 'application_name', 'block_id', 'block_name', 'block_type', 'view_id', 'view_name', 'view_type', 'import_id', 'import_name']
try:
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction='ignore')
writer.writeheader()
writer.writerows(data)
logging.info(f"Successfully wrote {len(data)} rows to {output_file}")
except IOError as e:
logging.error(f"Failed to write to file {output_file}: {e}")
# --- 4. Main Execution Block ---
def main():
"""Main entry point for the script."""
parser = argparse.ArgumentParser(description="Fetch a comprehensive metadata report from the Pigment API and export to a single CSV file.", formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("-o", "--output-file", default="pigment_metadata_report.csv", help="Path for the output CSV file (default: pigment_metadata_report.csv).")
parser.add_argument("-a", "--application-id", default=None, help="Optional: A specific Application ID to process, skipping the interactive prompt.")
args = parser.parse_args()
api_key = os.getenv('PIGMENT_METADATA_API_KEY')
if not api_key:
logging.error("PIGMENT_METADATA_API_KEY environment variable not set. This is required."); sys.exit(1)
try:
client = PigmentMetadataClient(api_key)
app_id_to_process = args.application_id
if not app_id_to_process:
applications = client.get_applications()
if not applications:
logging.error("Could not fetch any applications to process."); sys.exit(1)
print("\nPlease select an application to process:\n")
print("[0] Process ALL applications")
for i, app in enumerate(applications):
print(f"[{i+1}] {app.get('name')} (ID: {app.get('id')})")
while True:
try:
choice = int(input("\nEnter your choice: "));
if 0 <= choice <= len(applications): break
else: print(f"Invalid choice. Please enter a number between 0 and {len(applications)}.")
except ValueError: print("Invalid input. Please enter a number.")
if choice > 0:
app_id_to_process = applications[choice - 1]['id']
all_metadata_rows = list(generate_metadata_rows(client, app_id_to_process))
write_csv(all_metadata_rows, args.output_file)
except KeyboardInterrupt:
logging.warning("\nProcess interrupted by user. Exiting."); sys.exit(0)
except Exception as e:
logging.critical(f"A critical error stopped the process: {e}", exc_info=True); sys.exit(1)
if __name__ == "__main__":
main()This script needs the following environment variable:
Environment
--- Linux ---
export PIGMENT_METADATA_API_KEY="Metadata API key"
--- Windows ---
set PIGMENT_METADATA_API_KEY="Metadata API key"
--- Powershell ---
$env:PIGMENT_METADATA_API_KEY = "Metadata API key"Python requirements.txt
certifi==2025.8.3
charset-normalizer==3.4.3
idna==3.10
requests==2.32.5
tqdm==4.67.1
urllib3==2.5.0To run this script:
python metadata_extract.py Command-Line Arguments:
-
-o,--output-file: Specifies the path for the output CSV file. If omitted, it defaults topigment_metadata_report.csv. -
-a,--application-id: An optional argument to specify a single Application ID to process. This bypasses the interactive selection prompt.


