Skip to content

rg.Workspace

In Argilla, workspaces are used to organize datasets in to groups. For example, you might have a workspace for each project or team.

Usage Examples

To create a new workspace, instantiate the Workspace object with the client and the name:

workspace = rg.Workspace(name="my_workspace")
workspace.create()

To retrieve an existing workspace, use the client.workspaces attribute:

workspace = client.workspaces("my_workspace")

File Operations

List Files

List files in a workspace.

# Get a workspace reference
workspace = client.workspaces(name="my-workspace")

# List files with optional parameters
files = workspace.list_files(path="", recursive=True, include_version=True)

# Access the files
for file in files.objects:
    print(f"File: {file.object_name}, Size: {file.size}, Last Modified: {file.last_modified}")

Get File

Get a file from a workspace.

workspace = client.workspaces(name="my-workspace")
file_response = workspace.get_file(path="path/to/file.txt", version_id=None)

# Access the file content
content = file_response.content

# Access the file metadata
metadata = file_response.metadata
print(f"Content Type: {metadata.content_type}, ETag: {metadata.etag}")

Upload File

Upload a file to a workspace.

workspace = client.workspaces(name="my-workspace")
file_metadata = workspace.put_file(path="path/to/store/file.txt", file_path="/local/path/to/file.txt")

print(f"File uploaded: {file_metadata.object_name}")

Delete File

Delete a file from a workspace.

workspace = client.workspaces(name="my-workspace")
workspace.delete_file(path="path/to/file.txt", version_id=None)

Document Operations

Add Document

Add a document to a workspace.

workspace = client.workspaces(name="my-workspace")

# Add a document with a URL
document_id = workspace.add_document(url="https://example.com/document.pdf")

# Add a document with a PMID
document_id = workspace.add_document(pmid="PMC12345")

# Add a document with a DOI
document_id = workspace.add_document(doi="10.1234/example")

# Add a document with a file
document_id = workspace.add_document(file_path="/local/path/to/document.pdf")

Get Documents

Get documents from a workspace.

workspace = client.workspaces(name="my-workspace")
documents = workspace.get_documents()

# Access the documents
for doc in documents:
    print(f"Document ID: {doc.id}, URL: {doc.url}, PMID: {doc.pmid}, DOI: {doc.doi}")

Schema Operations

Get Schemas

Get schemas from a workspace.

workspace = client.workspaces(name="my-workspace")
schemas = workspace.get_schemas(prefix="schemas/", exclude=None)

# Access the schemas
for schema in schemas.schemas:
    print(f"Schema: {schema.name}")
    print(f"Columns: {list(schema.columns.keys())}")

Add Schema

Add a schema to a workspace.

import pandera as pa

# Create a schema
schema = pa.DataFrameSchema(
    name="my_schema",
    columns={
        "text": pa.Column(pa.String),
        "label": pa.Column(pa.String),
        "score": pa.Column(pa.Float, nullable=True),
    },
)

# Add the schema to the workspace
workspace = client.workspaces(name="my-workspace")
workspace.add_schema(schema, prefix="schemas/")

Update Schemas

Update schemas in a workspace.

import pandera as pa
from extralit.extraction.models import SchemaStructure

# Create schemas
schema1 = pa.DataFrameSchema(
    name="schema1",
    columns={
        "text": pa.Column(pa.String),
        "label": pa.Column(pa.String),
    },
)

schema2 = pa.DataFrameSchema(
    name="schema2",
    columns={
        "text": pa.Column(pa.String),
        "score": pa.Column(pa.Float),
    },
)

# Create a schema structure
schemas = SchemaStructure(schemas=[schema1, schema2])

# Update schemas in the workspace
workspace = client.workspaces(name="my-workspace")
result = workspace.update_schemas(schemas, check_existing=True, prefix="schemas/")

print(f"Updated {len(result.objects)} schemas")

Error Handling

All API methods include proper error handling. If an error occurs, an exception will be raised with a descriptive error message.

try:
    workspace = client.workspaces(name="non-existent-workspace")
    files = workspace.list_files("")
except Exception as e:
    print(f"Error: {str(e)}")

Workspace

Bases: Resource

Class for interacting with Argilla workspaces. Workspaces are used to organize datasets in the Argilla server.

Attributes:

Name Type Description
name str

The name of the workspace.

id UUID

The ID of the workspace. This is a unique identifier for the workspace in the server.

datasets List[Dataset]

A list of all datasets in the workspace.

users WorkspaceUsers

A list of all users in the workspace.

Source code in src/argilla/workspaces/_resource.py
class Workspace(Resource):
    """Class for interacting with Argilla workspaces. Workspaces are used to organize datasets in the Argilla server.

    Attributes:
        name (str): The name of the workspace.
        id (UUID): The ID of the workspace. This is a unique identifier for the workspace in the server.
        datasets (List[Dataset]): A list of all datasets in the workspace.
        users (WorkspaceUsers): A list of all users in the workspace.
    """

    name: Optional[str]

    _api: "WorkspacesAPI"

    def __init__(
        self,
        name: Optional[str] = None,
        id: Optional["UUID"] = None,
        client: Optional["Argilla"] = None,
    ) -> None:
        """Initializes a Workspace object with a client and a name or id

        Parameters:
            name (str): The name of the workspace
            id (UUID): The id of the workspace. If provided before a .create, the workspace will be created with this ID
            client (Argilla): The client used to interact with Argilla

        Returns:
            Workspace: The initialized workspace object
        """
        client = client or Argilla._get_default()
        super().__init__(client=client, api=client.api.workspaces)

        self._model = WorkspaceModel(name=name, id=id)

    def add_user(self, user: Union["User", str]) -> "User":
        """Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets
        in the workspace.

        Args:
            user (Union[User, str]): The user to add to the workspace. Can be a User object or a username.

        Returns:
            User: The user that was added to the workspace
        """
        return self.users.add(user)

    def remove_user(self, user: Union["User", str]) -> "User":
        """Removes a user from the workspace. After removing a user from the workspace, it will no longer have access

        Args:
            user (Union[User, str]): The user to remove from the workspace. Can be a User object or a username.

        Returns:
            User: The user that was removed from the workspace.
        """
        return self.users.delete(user)

    # TODO: Make this method private
    def list_datasets(self) -> List["Dataset"]:
        from argilla.datasets import Dataset

        datasets = self._client.api.datasets.list(self.id)
        self._log_message(f"Got {len(datasets)} datasets for workspace {self.id}")
        return [Dataset.from_model(model=dataset, client=self._client) for dataset in datasets]

    ####################
    # File methods #
    ####################

    def list_files(self, path: str, recursive: bool = True, include_version: bool = True) -> "ListObjectsResponse":
        """List files in the workspace.

        Args:
            path: The path to list files from.
            recursive: Whether to list files recursively.
            include_version: Whether to include version information.

        Returns:
            A list of files.
        """
        return self._api.list_files(self.name, path, recursive, include_version)

    def get_file(self, path: str, version_id: Optional[str] = None) -> "FileObjectResponse":
        """Get a file from the workspace.

        Args:
            path: The path of the file.
            version_id: The version ID of the file.

        Returns:
            The file content and metadata.
        """
        return self._api.get_file(self.name, path, version_id)

    def put_file(self, path: str, file_path: Union[str, Path]) -> "ObjectMetadata":
        """Upload a file to the workspace.

        Args:
            path: The path to store the file.
            file_path: The local path of the file to upload.

        Returns:
            The metadata of the uploaded file.
        """
        if isinstance(file_path, str):
            file_path = Path(file_path)
        return self._api.put_file(self.name, path, file_path)

    def delete_file(self, path: str, version_id: Optional[str] = None) -> None:
        """Delete a file from the workspace.

        Args:
            path: The path of the file to delete.
            version_id: The version ID of the file.
        """
        self._api.delete_file(self.name, path, version_id)

    ####################
    # Document methods #
    ####################

    def add_document(
        self,
        file_path: Optional[str] = None,
        url: Optional[str] = None,
        pmid: Optional[str] = None,
        doi: Optional[str] = None,
    ) -> "UUID":
        """Add a document to the workspace.

        Args:
            file_path: The local path of the file to upload.
            url: The URL of the document.
            pmid: The PMID of the document.
            doi: The DOI of the document.

        Returns:
            The ID of the added document.
        """
        from argilla._models._documents import Document

        document = Document(workspace_id=self.id, file_path=file_path, url=url, pmid=pmid, doi=doi)
        return self._api.add_document(document)

    def get_documents(self) -> List["Document"]:
        """Get documents from the workspace.

        Returns:
            A list of documents.
        """
        return self._api.get_documents(self.id)

    ####################
    # Schema methods #
    ####################

    def list_schemas(
        self, prefix: str = DEFAULT_SCHEMA_S3_PATH, exclude: Optional[List[str]] = None
    ) -> "SchemaStructure":
        """Get schemas from the workspace.

        Args:
            prefix: The prefix to filter schemas.
            exclude: List of schema names to exclude.

        Returns:
            A SchemaStructure containing the schemas.
        """
        return self._api.list_schemas(self.name, prefix, exclude)

    def add_schema(self, schema: Any, prefix: str = DEFAULT_SCHEMA_S3_PATH) -> None:
        """Add a schema to the workspace.

        Args:
            schema: The schema to add.
            prefix: The prefix to store the schema.
        """
        return self._api.add_schema(self.name, schema, prefix)

    def update_schemas(
        self, schemas: Any, check_existing: bool = True, prefix: str = DEFAULT_SCHEMA_S3_PATH
    ) -> "ListObjectsResponse":
        """Update schemas in the workspace.

        Args:
            schemas: The schemas to update.
            check_existing: Whether to check if the schema already exists.
            prefix: The prefix to store the schemas.

        Returns:
            A list of updated schema files.
        """
        return self._api.update_schemas(self.name, schemas, check_existing, prefix)

    @classmethod
    def from_model(cls, model: WorkspaceModel, client: Argilla) -> "Workspace":
        instance = cls(name=model.name, id=model.id, client=client)
        instance._model = model

        return instance

    ############################
    # Properties
    ############################

    @property
    def name(self) -> Optional[str]:
        return self._model.name

    @name.setter
    def name(self, value: str) -> None:
        self._model.name = value

    @property
    def datasets(self) -> List["Dataset"]:
        """List all datasets in the workspace

        Returns:
            List[Dataset]: A list of all datasets in the workspace
        """
        return self.list_datasets()

    @property
    def users(self) -> "WorkspaceUsers":
        """List all users in the workspace

        Returns:
            WorkspaceUsers: A list of all users in the workspace
        """
        return WorkspaceUsers(workspace=self)
datasets property

List all datasets in the workspace

Returns:

Type Description
List[Dataset]

List[Dataset]: A list of all datasets in the workspace

users property

List all users in the workspace

Returns:

Name Type Description
WorkspaceUsers WorkspaceUsers

A list of all users in the workspace

__init__(name=None, id=None, client=None)

Initializes a Workspace object with a client and a name or id

Parameters:

Name Type Description Default
name str

The name of the workspace

None
id UUID

The id of the workspace. If provided before a .create, the workspace will be created with this ID

None
client Argilla

The client used to interact with Argilla

None

Returns:

Name Type Description
Workspace None

The initialized workspace object

Source code in src/argilla/workspaces/_resource.py
def __init__(
    self,
    name: Optional[str] = None,
    id: Optional["UUID"] = None,
    client: Optional["Argilla"] = None,
) -> None:
    """Initializes a Workspace object with a client and a name or id

    Parameters:
        name (str): The name of the workspace
        id (UUID): The id of the workspace. If provided before a .create, the workspace will be created with this ID
        client (Argilla): The client used to interact with Argilla

    Returns:
        Workspace: The initialized workspace object
    """
    client = client or Argilla._get_default()
    super().__init__(client=client, api=client.api.workspaces)

    self._model = WorkspaceModel(name=name, id=id)
add_user(user)

Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets in the workspace.

Parameters:

Name Type Description Default
user Union[User, str]

The user to add to the workspace. Can be a User object or a username.

required

Returns:

Name Type Description
User User

The user that was added to the workspace

Source code in src/argilla/workspaces/_resource.py
def add_user(self, user: Union["User", str]) -> "User":
    """Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets
    in the workspace.

    Args:
        user (Union[User, str]): The user to add to the workspace. Can be a User object or a username.

    Returns:
        User: The user that was added to the workspace
    """
    return self.users.add(user)
remove_user(user)

Removes a user from the workspace. After removing a user from the workspace, it will no longer have access

Parameters:

Name Type Description Default
user Union[User, str]

The user to remove from the workspace. Can be a User object or a username.

required

Returns:

Name Type Description
User User

The user that was removed from the workspace.

Source code in src/argilla/workspaces/_resource.py
def remove_user(self, user: Union["User", str]) -> "User":
    """Removes a user from the workspace. After removing a user from the workspace, it will no longer have access

    Args:
        user (Union[User, str]): The user to remove from the workspace. Can be a User object or a username.

    Returns:
        User: The user that was removed from the workspace.
    """
    return self.users.delete(user)
list_files(path, recursive=True, include_version=True)

List files in the workspace.

Parameters:

Name Type Description Default
path str

The path to list files from.

required
recursive bool

Whether to list files recursively.

True
include_version bool

Whether to include version information.

True

Returns:

Type Description
ListObjectsResponse

A list of files.

Source code in src/argilla/workspaces/_resource.py
def list_files(self, path: str, recursive: bool = True, include_version: bool = True) -> "ListObjectsResponse":
    """List files in the workspace.

    Args:
        path: The path to list files from.
        recursive: Whether to list files recursively.
        include_version: Whether to include version information.

    Returns:
        A list of files.
    """
    return self._api.list_files(self.name, path, recursive, include_version)
get_file(path, version_id=None)

Get a file from the workspace.

Parameters:

Name Type Description Default
path str

The path of the file.

required
version_id Optional[str]

The version ID of the file.

None

Returns:

Type Description
FileObjectResponse

The file content and metadata.

Source code in src/argilla/workspaces/_resource.py
def get_file(self, path: str, version_id: Optional[str] = None) -> "FileObjectResponse":
    """Get a file from the workspace.

    Args:
        path: The path of the file.
        version_id: The version ID of the file.

    Returns:
        The file content and metadata.
    """
    return self._api.get_file(self.name, path, version_id)
put_file(path, file_path)

Upload a file to the workspace.

Parameters:

Name Type Description Default
path str

The path to store the file.

required
file_path Union[str, Path]

The local path of the file to upload.

required

Returns:

Type Description
ObjectMetadata

The metadata of the uploaded file.

Source code in src/argilla/workspaces/_resource.py
def put_file(self, path: str, file_path: Union[str, Path]) -> "ObjectMetadata":
    """Upload a file to the workspace.

    Args:
        path: The path to store the file.
        file_path: The local path of the file to upload.

    Returns:
        The metadata of the uploaded file.
    """
    if isinstance(file_path, str):
        file_path = Path(file_path)
    return self._api.put_file(self.name, path, file_path)
delete_file(path, version_id=None)

Delete a file from the workspace.

Parameters:

Name Type Description Default
path str

The path of the file to delete.

required
version_id Optional[str]

The version ID of the file.

None
Source code in src/argilla/workspaces/_resource.py
def delete_file(self, path: str, version_id: Optional[str] = None) -> None:
    """Delete a file from the workspace.

    Args:
        path: The path of the file to delete.
        version_id: The version ID of the file.
    """
    self._api.delete_file(self.name, path, version_id)
add_document(file_path=None, url=None, pmid=None, doi=None)

Add a document to the workspace.

Parameters:

Name Type Description Default
file_path Optional[str]

The local path of the file to upload.

None
url Optional[str]

The URL of the document.

None
pmid Optional[str]

The PMID of the document.

None
doi Optional[str]

The DOI of the document.

None

Returns:

Type Description
UUID

The ID of the added document.

Source code in src/argilla/workspaces/_resource.py
def add_document(
    self,
    file_path: Optional[str] = None,
    url: Optional[str] = None,
    pmid: Optional[str] = None,
    doi: Optional[str] = None,
) -> "UUID":
    """Add a document to the workspace.

    Args:
        file_path: The local path of the file to upload.
        url: The URL of the document.
        pmid: The PMID of the document.
        doi: The DOI of the document.

    Returns:
        The ID of the added document.
    """
    from argilla._models._documents import Document

    document = Document(workspace_id=self.id, file_path=file_path, url=url, pmid=pmid, doi=doi)
    return self._api.add_document(document)
get_documents()

Get documents from the workspace.

Returns:

Type Description
List[Document]

A list of documents.

Source code in src/argilla/workspaces/_resource.py
def get_documents(self) -> List["Document"]:
    """Get documents from the workspace.

    Returns:
        A list of documents.
    """
    return self._api.get_documents(self.id)
list_schemas(prefix=DEFAULT_SCHEMA_S3_PATH, exclude=None)

Get schemas from the workspace.

Parameters:

Name Type Description Default
prefix str

The prefix to filter schemas.

DEFAULT_SCHEMA_S3_PATH
exclude Optional[List[str]]

List of schema names to exclude.

None

Returns:

Type Description
SchemaStructure

A SchemaStructure containing the schemas.

Source code in src/argilla/workspaces/_resource.py
def list_schemas(
    self, prefix: str = DEFAULT_SCHEMA_S3_PATH, exclude: Optional[List[str]] = None
) -> "SchemaStructure":
    """Get schemas from the workspace.

    Args:
        prefix: The prefix to filter schemas.
        exclude: List of schema names to exclude.

    Returns:
        A SchemaStructure containing the schemas.
    """
    return self._api.list_schemas(self.name, prefix, exclude)
add_schema(schema, prefix=DEFAULT_SCHEMA_S3_PATH)

Add a schema to the workspace.

Parameters:

Name Type Description Default
schema Any

The schema to add.

required
prefix str

The prefix to store the schema.

DEFAULT_SCHEMA_S3_PATH
Source code in src/argilla/workspaces/_resource.py
def add_schema(self, schema: Any, prefix: str = DEFAULT_SCHEMA_S3_PATH) -> None:
    """Add a schema to the workspace.

    Args:
        schema: The schema to add.
        prefix: The prefix to store the schema.
    """
    return self._api.add_schema(self.name, schema, prefix)
update_schemas(schemas, check_existing=True, prefix=DEFAULT_SCHEMA_S3_PATH)

Update schemas in the workspace.

Parameters:

Name Type Description Default
schemas Any

The schemas to update.

required
check_existing bool

Whether to check if the schema already exists.

True
prefix str

The prefix to store the schemas.

DEFAULT_SCHEMA_S3_PATH

Returns:

Type Description
ListObjectsResponse

A list of updated schema files.

Source code in src/argilla/workspaces/_resource.py
def update_schemas(
    self, schemas: Any, check_existing: bool = True, prefix: str = DEFAULT_SCHEMA_S3_PATH
) -> "ListObjectsResponse":
    """Update schemas in the workspace.

    Args:
        schemas: The schemas to update.
        check_existing: Whether to check if the schema already exists.
        prefix: The prefix to store the schemas.

    Returns:
        A list of updated schema files.
    """
    return self._api.update_schemas(self.name, schemas, check_existing, prefix)