Fetching Data
This tutorial covers fetching data from collections through listing, iterating, retrieving, and downloading using list_*, iter_*, get_item(), and download() methods.
Introduction
The API provides comprehensive ways to work with collection items:
- Listing and Iterating: Use
list_*anditer_*methods to explore collections - Getting Specific Items: Use
get_item()to retrieve specific files and folders - Downloading Items: Use
download()to download files and folders to your local filesystem
Listing and Iterating
The API provides two ways to access multiple resources:
list_*methods: Return all items at once (with pagination support)iter_*methods: Yield items one by one with transparent pagination
Both are equally supported and serve different use cases.
list_* Methods
Return all items at once, with pagination information:
iter_* Methods
Yield items one by one with transparent pagination:
Projects
Listing Projects
Object-Oriented Style:
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger
logger = get_logger(__name__)
async def list_projects():
async with AsyncNexus() as nexus:
# List all projects
projects = await nexus.list_projects()
logger.info(f"Found {len(projects)} project(s)")
for project in projects:
logger.info(f" - {project.name} ({project.uuid})")
asyncio.run(list_projects())
Path-Based Style:
from miura import Nexus
from miura.logging import get_logger
logger = get_logger(__name__)
with Nexus() as nexus:
projects = nexus.list_projects()
logger.info(f"Found {len(projects)} project(s)")
for project in projects:
logger.info(f" - {project.name} ({project.uuid})")
# Nexus automatically closes when exiting the with block
Iterating Projects
Object-Oriented Style:
async def iterate_projects():
async with AsyncNexus() as nexus:
# Iterate through all projects
async for project in nexus.iter_projects():
logger.info(f"Processing project: {project.name}")
# Process each project
asyncio.run(iterate_projects())
Path-Based Style:
with Nexus() as nexus:
for project in nexus.iter_projects():
logger.info(f"Processing project: {project.name}")
# Nexus automatically closes when exiting the with block
Pagination with list_projects()
list_projects() returns all projects at once
For large numbers of projects, use iter_projects() instead
projects = await nexus.list_projects() logger.info(f"Total projects: {len(projects)}")
Listing Collections
Object-Oriented Style:
async def list_collections():
async with AsyncNexus() as nexus:
project = await nexus.get_project("my-project")
# List all collections
collections = await project.list_collections()
logger.info(f"Found {len(collections)} collection(s)")
for collection_info in collections:
logger.info(f" - {collection_info.name} ({collection_info.uuid})")
asyncio.run(list_collections())
Path-Based Style:
with Nexus() as nexus:
project = nexus.get("my-project")
collections = project.list_collections()
logger.info(f"Found {len(collections)} collection(s)")
for collection_info in collections:
logger.info(f" - {collection_info.name} ({collection_info.uuid})")
# Nexus automatically closes when exiting the with block
Iterating Collections
Object-Oriented Style:
async def iterate_collections():
async with AsyncNexus() as nexus:
project = await nexus.get_project("my-project")
# Iterate through all collections
async for collection_info in project.iter_collections():
logger.info(f"Processing collection: {collection_info.name}")
# Get full collection object if needed
collection = await project.get_collection(collection_info.name)
# Process collection
asyncio.run(iterate_collections())
Path-Based Style:
with Nexus() as nexus:
project = nexus.get("my-project")
for collection_info in project.iter_collections():
logger.info(f"Processing collection: {collection_info.name}")
# Nexus automatically closes when exiting the with block
Items
Listing Items
Object-Oriented Style:
async def list_items():
async with AsyncNexus() as nexus:
project = await nexus.get_project("my-project")
collection = await project.get_collection("my-collection")
# List items with pagination
response = await collection.list_items(
path="/",
page=1,
page_size=50
)
items = response.get("items", [])
pagination = response.get("pagination", {})
logger.info(f"Found {pagination.get('total_count', 0)} item(s)")
logger.info(f"Showing page {pagination.get('page', 1)} of {pagination.get('total_pages', 1)}")
for item in items:
logger.info(f" - {item.name} ({'folder' if item.is_folder else 'file'})")
asyncio.run(list_items())
Path-Based Style:
with Nexus() as nexus:
collection = nexus.get("my-project/my-collection")
response = collection.list_items(path="/", page=1, page_size=50)
items = response.get("items", [])
for item in items:
logger.info(f" - {item.name}")
# Nexus automatically closes when exiting the with block
Iterating Items
Object-Oriented Style:
async def iterate_items():
async with AsyncNexus() as nexus:
project = await nexus.get_project("my-project")
collection = await project.get_collection("my-collection")
# Iterate through all items (transparent pagination)
async for item in collection.iter_items(path="/", page_size=50):
logger.info(f"Processing item: {item.name}")
# Process each item
asyncio.run(iterate_items())
Path-Based Style:
with Nexus() as nexus:
collection = nexus.get("my-project/my-collection")
for item in collection.iter_items(path="/", page_size=50):
logger.info(f"Processing item: {item.name}")
# Nexus automatically closes when exiting the with block
Pagination with list_items()
Get paginated results
response = await collection.list_items(path="/", page=1, page_size=10) items = response.get("items", ) pagination = response.get("pagination", {})
Get next page
if has_next: next_page = await collection.list_items(path="/", page=2, page_size=10)
1. Use list_* for Small Datasets
When you know the dataset is small, use list_*:
2. Use iter_* for Large Datasets
For large datasets, use iter_* to avoid loading everything into memory:
3. Configure Page Size
Adjust page size based on your needs:
Large page size: Fewer requests, more memory
async for item in collection.iter_items(page_size=1000): process_item(item)
4. Use Prefetching for Better Performance
Prefetch pages for smoother iteration:
Complete Examples
import asyncio from miura.aio import AsyncNexus from miura.logging import get_logger
logger = get_logger(name)
async def list_all_resources(): """List all projects, collections, and items.""" async with AsyncNexus() as nexus: # List projects logger.info("=== Projects ===") projects = await nexus.list_projects() logger.info(f"Found {len(projects)} project(s)")
for project in projects:
logger.info(f"\nProject: {project.name}")
# List collections in project
collections = await project.list_collections()
logger.info(f" Collections: {len(collections)}")
for collection_info in collections:
logger.info(f" Collection: {collection_info.name}")
# List items in collection
collection = await project.get_collection(collection_info.name)
response = await collection.list_items(path="/", page=1, page_size=10)
items = response.get("items", [])
logger.info(f" Items: {len(items)} (showing first 10)")
asyncio.run(list_all_resources())
Example 2: Iterate Through All Resources
async def iterate_all_resources():
"""Iterate through all projects, collections, and items."""
async with AsyncNexus() as nexus:
# Iterate projects
async for project_info in nexus.iter_projects():
logger.info(f"Processing project: {project_info.name}")
project = await nexus.get_project(project_info.name)
# Iterate collections
async for collection_info in project.iter_collections():
logger.info(f" Processing collection: {collection_info.name}")
collection = await project.get_collection(collection_info.name)
# Iterate items
async for item in collection.iter_items(path="/"):
logger.info(f" Processing item: {item.name}")
asyncio.run(iterate_all_resources())
Example 3: Selective Processing
async def selective_processing():
"""Process only specific items."""
async with AsyncNexus() as nexus:
project = await nexus.get_project("my-project")
collection = await project.get_collection("my-collection")
# Iterate and filter
file_count = 0
async for item in collection.iter_items(path="/"):
# Only process files (not folders)
if not item.is_folder:
# Only process large files (> 1MB)
if item.file_size and item.file_size > 1_000_000:
logger.info(f"Found large file: {item.name} ({item.file_size:,} bytes)")
file_count += 1
# Process the file
bound_item = await collection.get_item(item.item_uri)
if bound_item:
result = await bound_item.download("./downloads/", confirm=False)
logger.info(f" Downloaded: {result.get('files_downloaded', 0)} file(s)")
logger.info(f"Processed {file_count} large files")
asyncio.run(selective_processing())
Next Steps
- Projects and Collections - Learn about navigation styles and managing projects
- Getting Items Tutorial - Retrieve specific items
- Downloading Items Tutorial - Download items
- End-to-End Example - Complete workflow
Related Documentation
- API Reference - Complete API documentation
- Quick Start Guide - Get started with the Nexus API
Getting Individual Items
Async API
Using the Async API (miura.aio):
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger
logger = get_logger(__name__)
async def main():
async with AsyncNexus() as nexus:
# Get a project and collection
project = await nexus.get_project("my-project")
collection = await project.get_collection("my-collection")
# Get a specific file
item = await collection.get_item("/data/parameters.dat")
if item:
logger.info(f"Found file: {item.name}")
logger.info(f" URI: {item.item_uri}")
logger.info(f" Size: {item.file_size:,} bytes")
else:
logger.warning("File not found")
asyncio.run(main())
Sync API
Using the Sync API (miura):
from miura import Nexus
from miura.logging import get_logger
logger = get_logger(__name__)
with Nexus() as nexus:
# Get a project and collection
project = nexus.get_project("my-project")
collection = project.get_collection("my-collection")
# Get a specific file
item = collection.get_item("/data/parameters.dat")
if item:
logger.info(f"Found file: {item.name}")
logger.info(f" URI: {item.item_uri}")
logger.info(f" Size: {item.file_size:,} bytes")
else:
logger.warning("File not found")
# Nexus automatically closes when exiting the with block
Getting Folders
Folders are retrieved the same way, but with a trailing slash:
Async API:
Get a folder (async)
folder = await collection.get_item("/models/") if folder: logger.info(f"Found folder: {folder.name}") logger.info(f" URI: {folder.item_uri}") logger.info(f" Has children: {folder.has_children}")
Sync API:
Nested Paths
Get items in nested folders:
Async API:
Get a nested folder
folder = await collection.get_item("/01/h5/") if folder: logger.info(f"Found nested folder: {folder.item_uri}")
Sync API:
Get a nested folder
folder = collection.get_item("/01/h5/") if folder: logger.info(f"Found nested folder: {folder.item_uri}")
Using Hierarchical Paths
Note: Hierarchical path navigation (nexus.get()) is available in the Sync API (miura). This is a convenient way to navigate directly to resources using paths.
Sync API (Path-Based Navigation):
from miura import Nexus
from miura.logging import get_logger
logger = get_logger(__name__)
with Nexus() as nexus:
# Get a file using hierarchical path
# Format: "project-name/collection-name/item-path"
item = nexus.get("my-project/my-collection/data/file.txt")
logger.info(f"Found item: {item.name}")
logger.info(f" URI: {item.item_uri}")
# Get a folder using hierarchical path
folder = nexus.get("my-project/my-collection/models/")
logger.info(f"Found folder: {folder.name}")
# Get a collection (just project/collection)
collection = nexus.get("my-project/my-collection")
logger.info(f"Collection: {collection.name}")
# Get a project (just project name)
project = nexus.get("my-project")
logger.info(f"Project: {project.name}")
# Nexus automatically closes when exiting the with block
Note: The Async API uses object-oriented navigation. To get an item in the async API, use:
Async API equivalent (object-oriented)
async with AsyncNexus() as nexus: project = await nexus.get_project("my-project") collection = await project.get_collection("my-collection") item = await collection.get_item("/data/file.txt")
Benefits of Hierarchical Paths
- Convenience: Navigate directly to items without intermediate steps
- Readability: Clear, path-like syntax
- Flexibility: Works for projects, collections, and items
Accessing Item Properties
Once you have an item, you can access its properties:
item = await collection.get_item("/data/file.txt")
if item:
# Basic properties
logger.info(f"Name: {item.name}")
logger.info(f"URI: {item.item_uri}")
logger.info(f"UUID: {item.uuid}")
# File-specific properties
logger.info(f"File size: {item.file_size:,} bytes")
logger.info(f"Is folder: {item.is_folder}")
logger.info(f"Has children: {item.has_children}")
logger.info(f"Status: {item.status}")
# Metadata
if item.metadata:
logger.info(f"Metadata: {item.metadata}")
Property Reference
| Property | Type | Description |
|---|---|---|
name | str | Item name (last part of path) |
item_uri | str | Full path to the item |
uuid | str | Unique identifier |
file_size | int | File size in bytes (0 for folders) |
is_folder | bool | Whether this is a folder |
has_children | bool | Whether folder has children |
status | str | Item status (e.g., "SUCCEEDED") |
metadata | dict | Additional metadata |
Handling Missing Items
When an item doesn't exist, get_item() returns None:
item = await collection.get_item("/nonexistent/file.txt")
if item is None:
logger.warning("Item not found")
# Handle the missing item case
else: # Work with the item logger.info(f"Found: {item.name}")
Error Handling with Hierarchical Paths
When using nexus.get() with hierarchical paths, a ValueError is raised if the item is not found:
from miura import Nexus
from miura.logging import get_logger
logger = get_logger(__name__)
with Nexus() as nexus:
try:
item = nexus.get("my-project/my-collection/nonexistent/file.txt")
except ValueError as e:
logger.warning(f"Item not found: {e}")
# Handle the error
# Nexus automatically closes when exiting the with block
Complete Example
Here's a complete example that demonstrates getting items in various ways:
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger
logger = get_logger(__name__)
async def main():
async with AsyncNexus() as nexus:
# Get project and collection
project = await nexus.get_project("my-project")
collection = await project.get_collection("my-collection")
# Example 1: Get a root-level file
logger.info("=== Example 1: Root-level file ===")
item = await collection.get_item("/parameters.dat")
if item:
logger.info(f"Found: {item.name} ({item.file_size:,} bytes)")
# Example 2: Get a folder
logger.info("=== Example 2: Folder ===")
folder = await collection.get_item("/models/")
if folder:
logger.info(f"Found folder: {folder.name}")
logger.info(f" Has children: {folder.has_children}")
# Example 3: Get a nested file
logger.info("=== Example 3: Nested file ===")
nested_item = await collection.get_item("/01/h5/model.h5")
if nested_item:
logger.info(f"Found nested file: {nested_item.item_uri}")
# Example 4: Get a nested folder
logger.info("=== Example 4: Nested folder ===")
nested_folder = await collection.get_item("/01/h5/")
if nested_folder:
logger.info(f"Found nested folder: {nested_folder.item_uri}")
# Example 5: Handle missing item
logger.info("=== Example 5: Missing item ===")
missing = await collection.get_item("/nonexistent/file.txt")
if missing is None:
logger.warning("Item not found (as expected)")
if name == "main": asyncio.run(main())
Next Steps
- Downloading Items Tutorial - Learn how to download specific files and folders
- Schema Generation Tutorial - Generate schemas automatically from your filesystem
- End-to-End Example - Complete workflow combining all features
Related Documentation
- API Reference - Complete API documentation
- Quick Start Guide - Get started with the Nexus API
Downloading Items
The BoundCollectionItem class provides a convenient download() method that allows you to download files and folders directly from collection items. This is especially useful when you've retrieved a specific item using get_item().
Key Concepts
- BoundCollectionItem: A wrapper around
CollectionItemthat includes a reference to its parent collection, enabling direct download operations - Automatic Path Handling: The download method automatically handles file vs folder downloads
- Progress Tracking: Download operations provide status and progress information
Async API
Using the Async API (miura.aio):
import asyncio
from miura.aio import AsyncNexus
from miura.logging import get_logger
logger = get_logger(__name__)
async def main():
async with AsyncNexus() as nexus:
project = await nexus.get_project("my-project")
collection = await project.get_collection("my-collection")
# Get a specific file
item = await collection.get_item("/data/parameters.dat")
if item:
# Download the file
result = await item.download("./downloads/", confirm=False)
logger.info(f"Download status: {result['status']}")
logger.info(f"Files downloaded: {result.get('files_downloaded', 0)}")
logger.info(f"Total size: {result.get('downloaded_size', 0):,} bytes")
asyncio.run(main())
Sync API
Using the Sync API (miura):
from miura import Nexus
from miura.logging import get_logger
logger = get_logger(__name__)
with Nexus() as nexus:
project = nexus.get_project("my-project")
collection = project.get_collection("my-collection")
# Get a specific file
item = collection.get_item("/data/parameters.dat")
if item:
# Download the file
result = item.download("./downloads/", confirm=False)
logger.info(f"Download status: {result['status']}")
logger.info(f"Files downloaded: {result.get('files_downloaded', 0)}")
logger.info(f"Total size: {result.get('downloaded_size', 0):,} bytes")
# Nexus automatically closes when exiting the with block
Download to Specific Location
Async API:
Download to a specific directory (async)
item = await collection.get_item("/data/file.txt") if item: result = await item.download("./my-custom-path/", confirm=False) logger.info(f"Downloaded to: {result.get('destination', 'unknown')}")
Sync API:
Download with Confirmation
Async API:
Download with confirmation prompt (sync)
item = collection.get_item("/large-file.dat") if item: result = item.download("./downloads/", confirm=True) # User will be prompted before download starts
Downloading Folders
Download entire folders and their contents:
Nested Folder Downloads
Download nested folder structures:
Using Hierarchical Paths
Note: Hierarchical path navigation (nexus.get()) is available in the Sync API (miura).
Sync API (Path-Based Navigation):
from miura import Nexus
from miura.logging import get_logger
logger = get_logger(__name__)
with Nexus() as nexus:
# Get and download a file using hierarchical path
item = nexus.get("my-project/my-collection/data/file.txt")
result = item.download("./downloads/", confirm=False)
logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")
# Get and download a folder
folder = nexus.get("my-project/my-collection/models/")
result = folder.download("./downloads/models/", confirm=False)
logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")
# Nexus automatically closes when exiting the with block
Note: In the Async API, use object-oriented navigation:
Async API equivalent (object-oriented)
async with AsyncNexus() as nexus: project = await nexus.get_project("my-project") collection = await project.get_collection("my-collection") item = await collection.get_item("/data/file.txt") if item: result = await item.download("./downloads/", confirm=False)
Browse and Download Selectively
List items and download only specific ones:
Download only files (not folders)
for item in items: if not item.is_folder: # Get as bound item to enable download bound_item = await collection.get_item(item.item_uri) if bound_item: logger.info(f"Downloading {item.name}...") result = bound_item.download("./downloads/selective/", confirm=False) logger.info(f" Downloaded: {result.get('files_downloaded', 0)} file(s)")
Iterate and Download
Use iter_items() to process large collections:
download_count = 0
async for item in collection.iter_items(path="/", page_size=50):
# Only download files, limit to first 10 for demo
if not item.is_folder and download_count < 10:
bound_item = await collection.get_item(item.item_uri)
if bound_item:
logger.info(f"Downloading {item.name} ({item.file_size:,} bytes)...")
result = bound_item.download("./downloads/iterated/", confirm=False)
download_count += 1
logger.info(f" Downloaded: {result.get('files_downloaded', 0)} file(s)")
Filter by Criteria
Download items based on specific criteria:
async for item in collection.iter_items(path="/"):
# Download only large files (> 1MB)
if not item.is_folder and item.file_size and item.file_size > 1_000_000:
bound_item = await collection.get_item(item.item_uri)
if bound_item:
logger.info(f"Downloading large file: {item.name}")
result = bound_item.download("./downloads/large-files/", confirm=False)
Complete Example
Here's a complete example demonstrating various download scenarios:
import asyncio
from datetime import datetime
from miura.aio import AsyncNexus
from miura.logging import get_logger
logger = get_logger(__name__)
async def main():
async with AsyncNexus() as nexus:
project = await nexus.get_project("my-project")
collection = await project.get_collection("my-collection")
# Example 1: Download a specific file
logger.info("=== Example 1: Download specific file ===")
item = await collection.get_item("/parameters.dat")
if item:
result = item.download("./downloads/", confirm=False)
logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")
# Example 2: Download a folder
logger.info("=== Example 2: Download folder ===")
folder = await collection.get_item("/models/")
if folder:
result = folder.download("./downloads/models/", confirm=False)
logger.info(f"Downloaded: {result.get('files_downloaded', 0)} file(s)")
# Example 3: Browse and download selectively
logger.info("=== Example 3: Selective download ===")
response = await collection.list_items(path="/", page=1, page_size=10)
items = response.get("items", [])
for item in items[:3]: # Download first 3 items
if not item.is_folder:
bound_item = await collection.get_item(item.item_uri)
if bound_item:
logger.info(f"Downloading {item.name}...")
result = bound_item.download("./downloads/selective/", confirm=False)
logger.info(f" Status: {result['status']}")
# Example 4: Iterate and download
logger.info("=== Example 4: Iterate and download ===")
download_count = 0
async for item in collection.iter_items(path="/"):
if not item.is_folder and download_count < 2:
bound_item = await collection.get_item(item.item_uri)
if bound_item:
logger.info(f"Downloading {item.name}...")
result = bound_item.download("./downloads/iterated/", confirm=False)
download_count += 1
if download_count >= 2:
break
if name == "main": asyncio.run(main())
Download Result Structure
The download() method returns a dictionary with the following structure:
{
"status": "completed", # "completed", "failed", "partial"
"files_downloaded": 5, # Number of files successfully downloaded
"files_failed": 0, # Number of files that failed to download
"total_files": 5, # Total number of files processed
"downloaded_size": 1024000, # Total size downloaded in bytes
"destination": "./downloads/", # Destination directory
"errors": [] # List of error messages (if any)
}
Handling Download Results
result = item.download("./downloads/", confirm=False)
if result["status"] == "completed":
logger.info("Download completed successfully")
logger.info(f"Downloaded {result['files_downloaded']} file(s)")
elif result["status"] == "partial":
logger.warning("Download partially completed")
logger.warning(f"Failed: {result['files_failed']} file(s)")
if result.get("errors"):
for error in result["errors"]:
logger.error(f"Error: {error}")
elif result["status"] == "failed":
logger.error("Download failed")
if result.get("errors"):
for error in result["errors"]:
logger.error(f"Error: {error}")
Best Practices
1. Check Item Existence
Always check if an item exists before downloading:
item = await collection.get_item("/data/file.txt")
if item:
result = item.download("./downloads/", confirm=False)
else:
logger.warning("Item not found, skipping download")
2. Use Appropriate Paths
Use trailing slashes for folders, no trailing slash for files:
File
item = await collection.get_item("/data/file.txt")
3. Handle Large Downloads
For large downloads, use confirm=True or provide user feedback:
item = await collection.get_item("/large-file.dat")
if item and item.file_size:
size_mb = item.file_size / (1024 * 1024)
logger.info(f"Preparing to download {size_mb:.2f} MB file")
result = item.download("./downloads/", confirm=True)
4. Organize Downloads
Use descriptive paths to organize downloaded files:
from datetime import datetime
Organize by date
date_str = datetime.now().strftime("%Y%m%d") result = item.download(f"./downloads/{date_str}/", confirm=False)
Organize by item type
if item.is_folder: result = item.download("./downloads/folders/", confirm=False) else: result = item.download("./downloads/files/", confirm=False)
Next Steps
- Uploading Data - Upload files and folders to collections
- Schemas - Learn about schemas and schema generation
- End-to-End Workflows - Complete workflow combining all features
Related Documentation
- API Reference - Complete API documentation
- Quick Start Guide - Get started with the Nexus API