Fetching Data

List, iterate, retrieve, and download collection items

This tutorial covers fetching data from collections through listing, iterating, retrieving, and downloading using list_*, iter_*, get_item(), and download() methods.


Introduction

The API provides comprehensive ways to work with collection items:

  • Listing and Iterating: Use list_* and iter_* methods to explore collections
  • Getting Specific Items: Use get_item() to retrieve specific files and folders
  • Downloading Items: Use download() to download files and folders to your local filesystem

Listing and Iterating

The API provides two ways to access multiple resources:

  • list_* methods: Return all items at once (with pagination support)
  • iter_* methods: Yield items one by one with transparent pagination

Both are equally supported and serve different use cases.


list_* Methods

Return all items at once, with pagination information:

iter_* Methods

Yield items one by one with transparent pagination:

Projects

Listing Projects

Object-Oriented Style:

python
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger

logger = get_logger(__name__)

async def list_projects():
    async with AsyncNexus() as nexus:
    # List all projects
    projects = await nexus.list_projects()
    logger.info(f"Found {len(projects)} project(s)")
    for project in projects:
        logger.info(f"  - {project.name} ({project.uuid})")

asyncio.run(list_projects())

Path-Based Style:

python
from miura import Nexus
from miura.logging import get_logger

logger = get_logger(__name__)

with Nexus() as nexus:
    projects = nexus.list_projects()
    logger.info(f"Found {len(projects)} project(s)")
    for project in projects:
        logger.info(f"  - {project.name} ({project.uuid})")
# Nexus automatically closes when exiting the with block

Iterating Projects

Object-Oriented Style:

python
async def iterate_projects():
    async with AsyncNexus() as nexus:
    # Iterate through all projects
    async for project in nexus.iter_projects():
        logger.info(f"Processing project: {project.name}")
        # Process each project

asyncio.run(iterate_projects())

Path-Based Style:

python
with Nexus() as nexus:
    for project in nexus.iter_projects():
        logger.info(f"Processing project: {project.name}")
# Nexus automatically closes when exiting the with block

Pagination with list_projects()

list_projects() returns all projects at once

For large numbers of projects, use iter_projects() instead

projects = await nexus.list_projects() logger.info(f"Total projects: {len(projects)}")


Listing Collections

Object-Oriented Style:

python
async def list_collections():
    async with AsyncNexus() as nexus:
        project = await nexus.get_project("my-project")
    # List all collections
    collections = await project.list_collections()
    logger.info(f"Found {len(collections)} collection(s)")
    for collection_info in collections:
        logger.info(f"  - {collection_info.name} ({collection_info.uuid})")

asyncio.run(list_collections())

Path-Based Style:

python
with Nexus() as nexus:
    project = nexus.get("my-project")
    collections = project.list_collections()
    logger.info(f"Found {len(collections)} collection(s)")
    for collection_info in collections:
        logger.info(f"  - {collection_info.name} ({collection_info.uuid})")
# Nexus automatically closes when exiting the with block

Iterating Collections

Object-Oriented Style:

python
async def iterate_collections():
    async with AsyncNexus() as nexus:
        project = await nexus.get_project("my-project")
    # Iterate through all collections
    async for collection_info in project.iter_collections():
        logger.info(f"Processing collection: {collection_info.name}")
        # Get full collection object if needed
        collection = await project.get_collection(collection_info.name)
        # Process collection

asyncio.run(iterate_collections())

Path-Based Style:

python
with Nexus() as nexus:
    project = nexus.get("my-project")
    for collection_info in project.iter_collections():
        logger.info(f"Processing collection: {collection_info.name}")
# Nexus automatically closes when exiting the with block

Items

Listing Items

Object-Oriented Style:

python
async def list_items():
    async with AsyncNexus() as nexus:
        project = await nexus.get_project("my-project")
        collection = await project.get_collection("my-collection")
    # List items with pagination
    response = await collection.list_items(
        path="/",
        page=1,
        page_size=50
    )
    items = response.get("items", [])
    pagination = response.get("pagination", {})
    
    logger.info(f"Found {pagination.get('total_count', 0)} item(s)")
    logger.info(f"Showing page {pagination.get('page', 1)} of {pagination.get('total_pages', 1)}")
    
    for item in items:
        logger.info(f"  - {item.name} ({'folder' if item.is_folder else 'file'})")

asyncio.run(list_items())

Path-Based Style:

python
with Nexus() as nexus:
    collection = nexus.get("my-project/my-collection")
    response = collection.list_items(path="/", page=1, page_size=50)
    items = response.get("items", [])
    
    for item in items:
        logger.info(f"  - {item.name}")
# Nexus automatically closes when exiting the with block

Iterating Items

Object-Oriented Style:

python
async def iterate_items():
    async with AsyncNexus() as nexus:
        project = await nexus.get_project("my-project")
        collection = await project.get_collection("my-collection")
    # Iterate through all items (transparent pagination)
    async for item in collection.iter_items(path="/", page_size=50):
        logger.info(f"Processing item: {item.name}")
        # Process each item

asyncio.run(iterate_items())

Path-Based Style:

python
with Nexus() as nexus:
    collection = nexus.get("my-project/my-collection")
    for item in collection.iter_items(path="/", page_size=50):
        logger.info(f"Processing item: {item.name}")
# Nexus automatically closes when exiting the with block

Pagination with list_items()

Get paginated results

response = await collection.list_items(path="/", page=1, page_size=10) items = response.get("items", ) pagination = response.get("pagination", {})

Get next page

if has_next: next_page = await collection.list_items(path="/", page=2, page_size=10)


1. Use list_* for Small Datasets

When you know the dataset is small, use list_*:

2. Use iter_* for Large Datasets

For large datasets, use iter_* to avoid loading everything into memory:

3. Configure Page Size

Adjust page size based on your needs:

Large page size: Fewer requests, more memory

async for item in collection.iter_items(page_size=1000): process_item(item)

4. Use Prefetching for Better Performance

Prefetch pages for smoother iteration:

Complete Examples

import asyncio from miura.aio import AsyncNexus from miura.logging import get_logger

logger = get_logger(name)

async def list_all_resources(): """List all projects, collections, and items.""" async with AsyncNexus() as nexus: # List projects logger.info("=== Projects ===") projects = await nexus.list_projects() logger.info(f"Found {len(projects)} project(s)")

    for project in projects:
        logger.info(f"\nProject: {project.name}")
        

        # List collections in project
        collections = await project.list_collections()
        logger.info(f"  Collections: {len(collections)}")
        
        for collection_info in collections:
            logger.info(f"    Collection: {collection_info.name}")
            

            # List items in collection
            collection = await project.get_collection(collection_info.name)
            response = await collection.list_items(path="/", page=1, page_size=10)
            items = response.get("items", [])
            logger.info(f"      Items: {len(items)} (showing first 10)")

asyncio.run(list_all_resources())

Example 2: Iterate Through All Resources

python
async def iterate_all_resources():
    """Iterate through all projects, collections, and items."""
    async with AsyncNexus() as nexus:
    # Iterate projects
    async for project_info in nexus.iter_projects():
        logger.info(f"Processing project: {project_info.name}")
        
        project = await nexus.get_project(project_info.name)
        

        # Iterate collections
        async for collection_info in project.iter_collections():
            logger.info(f"  Processing collection: {collection_info.name}")
            
python
                collection = await project.get_collection(collection_info.name)
            # Iterate items
            async for item in collection.iter_items(path="/"):
                logger.info(f"    Processing item: {item.name}")

asyncio.run(iterate_all_resources())

Example 3: Selective Processing

python
async def selective_processing():
    """Process only specific items."""
    async with AsyncNexus() as nexus:
        project = await nexus.get_project("my-project")
        collection = await project.get_collection("my-collection")
    # Iterate and filter
    file_count = 0
    async for item in collection.iter_items(path="/"):
        # Only process files (not folders)
        if not item.is_folder:
            # Only process large files (> 1MB)
            if item.file_size and item.file_size > 1_000_000:
                logger.info(f"Found large file: {item.name} ({item.file_size:,} bytes)")
                file_count += 1
                

                # Process the file
                bound_item = await collection.get_item(item.item_uri)
                if bound_item:
                    result = await bound_item.download("./downloads/", confirm=False)
                    logger.info(f"  Downloaded: {result.get('files_downloaded', 0)} file(s)")
    
    logger.info(f"Processed {file_count} large files")

asyncio.run(selective_processing())


Next Steps



Getting Individual Items

Async API

Using the Async API (miura.aio):

python
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger

logger = get_logger(__name__)

async def main():
    async with AsyncNexus() as nexus:
    # Get a project and collection
    project = await nexus.get_project("my-project")
    collection = await project.get_collection("my-collection")
    

    # Get a specific file
    item = await collection.get_item("/data/parameters.dat")
    if item:
        logger.info(f"Found file: {item.name}")
        logger.info(f"  URI: {item.item_uri}")
        logger.info(f"  Size: {item.file_size:,} bytes")
    else:
        logger.warning("File not found")

asyncio.run(main())

Sync API

Using the Sync API (miura):

python
from miura import Nexus
from miura.logging import get_logger

logger = get_logger(__name__)

with Nexus() as nexus:
# Get a project and collection
project = nexus.get_project("my-project")
collection = project.get_collection("my-collection")


# Get a specific file
item = collection.get_item("/data/parameters.dat")
if item:
    logger.info(f"Found file: {item.name}")
    logger.info(f"  URI: {item.item_uri}")
    logger.info(f"  Size: {item.file_size:,} bytes")
else:
    logger.warning("File not found")
# Nexus automatically closes when exiting the with block

Getting Folders

Folders are retrieved the same way, but with a trailing slash:

Async API:

Get a folder (async)

folder = await collection.get_item("/models/") if folder: logger.info(f"Found folder: {folder.name}") logger.info(f" URI: {folder.item_uri}") logger.info(f" Has children: {folder.has_children}")

Sync API:

Nested Paths

Get items in nested folders:

Async API:

Get a nested folder

folder = await collection.get_item("/01/h5/") if folder: logger.info(f"Found nested folder: {folder.item_uri}")

Sync API:

Get a nested folder

folder = collection.get_item("/01/h5/") if folder: logger.info(f"Found nested folder: {folder.item_uri}")


Using Hierarchical Paths

Note: Hierarchical path navigation (nexus.get()) is available in the Sync API (miura). This is a convenient way to navigate directly to resources using paths.

Sync API (Path-Based Navigation):

python
from miura import Nexus
from miura.logging import get_logger

logger = get_logger(__name__)

with Nexus() as nexus:
# Get a file using hierarchical path
# Format: "project-name/collection-name/item-path"
item = nexus.get("my-project/my-collection/data/file.txt")
logger.info(f"Found item: {item.name}")
logger.info(f"  URI: {item.item_uri}")


# Get a folder using hierarchical path
folder = nexus.get("my-project/my-collection/models/")
logger.info(f"Found folder: {folder.name}")


# Get a collection (just project/collection)
collection = nexus.get("my-project/my-collection")
logger.info(f"Collection: {collection.name}")


# Get a project (just project name)
project = nexus.get("my-project")
logger.info(f"Project: {project.name}")
# Nexus automatically closes when exiting the with block

Note: The Async API uses object-oriented navigation. To get an item in the async API, use:

Async API equivalent (object-oriented)

async with AsyncNexus() as nexus: project = await nexus.get_project("my-project") collection = await project.get_collection("my-collection") item = await collection.get_item("/data/file.txt")

Benefits of Hierarchical Paths

  • Convenience: Navigate directly to items without intermediate steps
  • Readability: Clear, path-like syntax
  • Flexibility: Works for projects, collections, and items

Accessing Item Properties

Once you have an item, you can access its properties:

python
item = await collection.get_item("/data/file.txt")
if item:
# Basic properties
logger.info(f"Name: {item.name}")
logger.info(f"URI: {item.item_uri}")
logger.info(f"UUID: {item.uuid}")

# File-specific properties
logger.info(f"File size: {item.file_size:,} bytes")
logger.info(f"Is folder: {item.is_folder}")
logger.info(f"Has children: {item.has_children}")
logger.info(f"Status: {item.status}")


# Metadata
if item.metadata:
    logger.info(f"Metadata: {item.metadata}")

Property Reference

PropertyTypeDescription
namestrItem name (last part of path)
item_uristrFull path to the item
uuidstrUnique identifier
file_sizeintFile size in bytes (0 for folders)
is_folderboolWhether this is a folder
has_childrenboolWhether folder has children
statusstrItem status (e.g., "SUCCEEDED")
metadatadictAdditional metadata

Handling Missing Items

When an item doesn't exist, get_item() returns None:

python
item = await collection.get_item("/nonexistent/file.txt")
if item is None:
    logger.warning("Item not found")
# Handle the missing item case

else: # Work with the item logger.info(f"Found: {item.name}")

Error Handling with Hierarchical Paths

When using nexus.get() with hierarchical paths, a ValueError is raised if the item is not found:

python
from miura import Nexus
from miura.logging import get_logger

logger = get_logger(__name__)

with Nexus() as nexus:
    try:
        item = nexus.get("my-project/my-collection/nonexistent/file.txt")
    except ValueError as e:
        logger.warning(f"Item not found: {e}")
    # Handle the error
# Nexus automatically closes when exiting the with block

Complete Example

Here's a complete example that demonstrates getting items in various ways:

python
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger

logger = get_logger(__name__)

async def main():
    async with AsyncNexus() as nexus:
    # Get project and collection
    project = await nexus.get_project("my-project")
    collection = await project.get_collection("my-collection")
    

    # Example 1: Get a root-level file
    logger.info("=== Example 1: Root-level file ===")
    item = await collection.get_item("/parameters.dat")
    if item:
        logger.info(f"Found: {item.name} ({item.file_size:,} bytes)")
    

    # Example 2: Get a folder
    logger.info("=== Example 2: Folder ===")
    folder = await collection.get_item("/models/")
    if folder:
        logger.info(f"Found folder: {folder.name}")
        logger.info(f"  Has children: {folder.has_children}")
    

    # Example 3: Get a nested file
    logger.info("=== Example 3: Nested file ===")
    nested_item = await collection.get_item("/01/h5/model.h5")
    if nested_item:
        logger.info(f"Found nested file: {nested_item.item_uri}")
    

    # Example 4: Get a nested folder
    logger.info("=== Example 4: Nested folder ===")
    nested_folder = await collection.get_item("/01/h5/")
    if nested_folder:
        logger.info(f"Found nested folder: {nested_folder.item_uri}")
    

    # Example 5: Handle missing item
    logger.info("=== Example 5: Missing item ===")
    missing = await collection.get_item("/nonexistent/file.txt")
    if missing is None:
        logger.warning("Item not found (as expected)")

if name == "main": asyncio.run(main())


Next Steps



Downloading Items

The BoundCollectionItem class provides a convenient download() method that allows you to download files and folders directly from collection items. This is especially useful when you've retrieved a specific item using get_item().

Key Concepts

  • BoundCollectionItem: A wrapper around CollectionItem that includes a reference to its parent collection, enabling direct download operations
  • Automatic Path Handling: The download method automatically handles file vs folder downloads
  • Progress Tracking: Download operations provide status and progress information

Async API

Using the Async API (miura.aio):

python
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger

logger = get_logger(__name__)

async def main():
    async with AsyncNexus() as nexus:
        project = await nexus.get_project("my-project")
        collection = await project.get_collection("my-collection")
    # Get a specific file
    item = await collection.get_item("/data/parameters.dat")
    if item:
        # Download the file
        result = await item.download("./downloads/", confirm=False)
        logger.info(f"Download status: {result['status']}")
        logger.info(f"Files downloaded: {result.get('files_downloaded', 0)}")
        logger.info(f"Total size: {result.get('downloaded_size', 0):,} bytes")

asyncio.run(main())

Sync API

Using the Sync API (miura):

python
from miura import Nexus
from miura.logging import get_logger

logger = get_logger(__name__)

with Nexus() as nexus:
    project = nexus.get_project("my-project")
    collection = project.get_collection("my-collection")
# Get a specific file
item = collection.get_item("/data/parameters.dat")
if item:
    # Download the file
    result = item.download("./downloads/", confirm=False)
    logger.info(f"Download status: {result['status']}")
    logger.info(f"Files downloaded: {result.get('files_downloaded', 0)}")
    logger.info(f"Total size: {result.get('downloaded_size', 0):,} bytes")
# Nexus automatically closes when exiting the with block

Download to Specific Location

Async API:

Download to a specific directory (async)

item = await collection.get_item("/data/file.txt") if item: result = await item.download("./my-custom-path/", confirm=False) logger.info(f"Downloaded to: {result.get('destination', 'unknown')}")

Sync API:

Download with Confirmation

Async API:

Download with confirmation prompt (sync)

item = collection.get_item("/large-file.dat") if item: result = item.download("./downloads/", confirm=True) # User will be prompted before download starts


Downloading Folders

Download entire folders and their contents:

Nested Folder Downloads

Download nested folder structures:

Using Hierarchical Paths

Note: Hierarchical path navigation (nexus.get()) is available in the Sync API (miura).

Sync API (Path-Based Navigation):

python
from miura import Nexus
from miura.logging import get_logger

logger = get_logger(__name__)

with Nexus() as nexus:
# Get and download a file using hierarchical path
item = nexus.get("my-project/my-collection/data/file.txt")
result = item.download("./downloads/", confirm=False)
logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")


# Get and download a folder
folder = nexus.get("my-project/my-collection/models/")
result = folder.download("./downloads/models/", confirm=False)
logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")
# Nexus automatically closes when exiting the with block

Note: In the Async API, use object-oriented navigation:

Async API equivalent (object-oriented)

async with AsyncNexus() as nexus: project = await nexus.get_project("my-project") collection = await project.get_collection("my-collection") item = await collection.get_item("/data/file.txt") if item: result = await item.download("./downloads/", confirm=False)


Browse and Download Selectively

List items and download only specific ones:

Download only files (not folders)

for item in items: if not item.is_folder: # Get as bound item to enable download bound_item = await collection.get_item(item.item_uri) if bound_item: logger.info(f"Downloading {item.name}...") result = bound_item.download("./downloads/selective/", confirm=False) logger.info(f" Downloaded: {result.get('files_downloaded', 0)} file(s)")

Iterate and Download

Use iter_items() to process large collections:

python
download_count = 0
async for item in collection.iter_items(path="/", page_size=50):
# Only download files, limit to first 10 for demo
if not item.is_folder and download_count < 10:
    bound_item = await collection.get_item(item.item_uri)
    if bound_item:
        logger.info(f"Downloading {item.name} ({item.file_size:,} bytes)...")
        result = bound_item.download("./downloads/iterated/", confirm=False)
        download_count += 1
        logger.info(f"  Downloaded: {result.get('files_downloaded', 0)} file(s)")

Filter by Criteria

Download items based on specific criteria:

python
async for item in collection.iter_items(path="/"):
# Download only large files (> 1MB)
if not item.is_folder and item.file_size and item.file_size > 1_000_000:
    bound_item = await collection.get_item(item.item_uri)
    if bound_item:
        logger.info(f"Downloading large file: {item.name}")
        result = bound_item.download("./downloads/large-files/", confirm=False)

Complete Example

Here's a complete example demonstrating various download scenarios:

python
import asyncio
from datetime import datetime
from miura.aio import AsyncNexus
from miura.logging import get_logger

logger = get_logger(__name__)

async def main():
    async with AsyncNexus() as nexus:
        project = await nexus.get_project("my-project")
        collection = await project.get_collection("my-collection")
    # Example 1: Download a specific file
    logger.info("=== Example 1: Download specific file ===")
    item = await collection.get_item("/parameters.dat")
    if item:
        result = item.download("./downloads/", confirm=False)
        logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")
    

    # Example 2: Download a folder
    logger.info("=== Example 2: Download folder ===")
    folder = await collection.get_item("/models/")
    if folder:
        result = folder.download("./downloads/models/", confirm=False)
        logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")
    

    # Example 3: Browse and download selectively
    logger.info("=== Example 3: Selective download ===")
    response = await collection.list_items(path="/", page=1, page_size=10)
    items = response.get("items", [])
    
    for item in items[:3]:  # Download first 3 items
        if not item.is_folder:
            bound_item = await collection.get_item(item.item_uri)
            if bound_item:
                logger.info(f"Downloading {item.name}...")
                result = bound_item.download("./downloads/selective/", confirm=False)
                logger.info(f"  Status: {result['status']}")
    

    # Example 4: Iterate and download
    logger.info("=== Example 4: Iterate and download ===")
    download_count = 0
    async for item in collection.iter_items(path="/"):
        if not item.is_folder and download_count < 2:
            bound_item = await collection.get_item(item.item_uri)
            if bound_item:
                logger.info(f"Downloading {item.name}...")
                result = bound_item.download("./downloads/iterated/", confirm=False)
                download_count += 1
                if download_count >= 2:
                    break

if name == "main": asyncio.run(main())


Download Result Structure

The download() method returns a dictionary with the following structure:

python
{
    "status": "completed",  # "completed", "failed", "partial"
    "files_downloaded": 5,  # Number of files successfully downloaded
    "files_failed": 0,  # Number of files that failed to download
    "total_files": 5,  # Total number of files processed
    "downloaded_size": 1024000,  # Total size downloaded in bytes
    "destination": "./downloads/",  # Destination directory
    "errors": []  # List of error messages (if any)
}

Handling Download Results

python
result = item.download("./downloads/", confirm=False)

if result["status"] == "completed":
    logger.info("Download completed successfully")
    logger.info(f"Downloaded {result['files_downloaded']} file(s)")
elif result["status"] == "partial":
    logger.warning("Download partially completed")
    logger.warning(f"Failed: {result['files_failed']} file(s)")
    if result.get("errors"):
        for error in result["errors"]:
            logger.error(f"Error: {error}")
elif result["status"] == "failed":
    logger.error("Download failed")
    if result.get("errors"):
        for error in result["errors"]:
            logger.error(f"Error: {error}")

Best Practices

1. Check Item Existence

Always check if an item exists before downloading:

python
item = await collection.get_item("/data/file.txt")
if item:
    result = item.download("./downloads/", confirm=False)
else:
    logger.warning("Item not found, skipping download")

2. Use Appropriate Paths

Use trailing slashes for folders, no trailing slash for files:

File

item = await collection.get_item("/data/file.txt")

3. Handle Large Downloads

For large downloads, use confirm=True or provide user feedback:

python
item = await collection.get_item("/large-file.dat")
if item and item.file_size:
    size_mb = item.file_size / (1024 * 1024)
    logger.info(f"Preparing to download {size_mb:.2f} MB file")
    result = item.download("./downloads/", confirm=True)

4. Organize Downloads

Use descriptive paths to organize downloaded files:

python
from datetime import datetime

Organize by date

date_str = datetime.now().strftime("%Y%m%d") result = item.download(f"./downloads/{date_str}/", confirm=False)

Organize by item type

if item.is_folder: result = item.download("./downloads/folders/", confirm=False) else: result = item.download("./downloads/files/", confirm=False)


Next Steps


© 2025