Source code for geobox.file

from urllib.parse import urljoin
from typing import Optional, Dict, List, Union, TYPE_CHECKING
import os
import mimetypes
from geobox.exception import ValidationError
import requests
import sys

from .base import Base
from .enums import FileFormat, PublishFileType, InputGeomType, FileType
from .utils import clean_data, get_unique_filename, get_save_path
from .task import Task
from .feature import Feature

if TYPE_CHECKING:
    from . import GeoboxClient 
    from .user import User


[docs] class File(Base): BASE_ENDPOINT: str = 'files/'
[docs] def __init__(self, api: 'GeoboxClient', uuid: str, data: Optional[Dict] = {}): """ Constructs all the necessary attributes for the File object. Args: api (GeoboxClient): The GeoboxClient instance. uuid (str): The UUID of the file. data (Dict, optional): The data of the file. """ super().__init__(api, uuid=uuid, data=data)
[docs] def __repr__(self) -> str: """ Return a string representation of the File object. Returns: str: A string representation of the File object. """ return f"File(uuid={self.uuid}, file_name={self.name}, file_type={self.file_type.value})"
@property def layers(self) -> List[Dict]: """ Get the layers of the file. Returns: List[Dict]: The layers of the file. Example: >>> from geobox import GeoboxClient >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> file.layers """ return self.data.get('layers', {}).get('layers', []) @property def file_type(self) -> 'FileType': """ Get the file type Returns: FileType: the file type enumeration Example: >>> from geobox import GeoboxClient >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> file.file_type """ return FileType(self.data.get('file_type'))
[docs] @classmethod def upload_file(cls, api: 'GeoboxClient', path: str, user_id: int = None, scan_archive: bool = True) -> 'File': """ Upload a file to the GeoBox API. Args: api (GeoboxClient): The GeoboxClient instance. path (str): The path to the file to upload. user_id (int, optional): specific user. privileges required. scan_archive (bool, optional): Whether to scan the archive for layers. default: True Returns: File: The uploaded file instance. Raises: ValueError: If the file type is invalid. FileNotFoundError: If the file does not exist. Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.upload_file(client, path='path/to/file.shp') or >>> file = client.upload_file(path='path/to/file.shp') """ # Check if the file exists if not os.path.exists(path): raise FileNotFoundError(f"File not found: {path}") # Check if the file type is valid FileFormat(os.path.splitext(path)[1]) data = clean_data({ "user_id": user_id, "scan_archive": scan_archive }) endpoint = cls.BASE_ENDPOINT with open(path, 'rb') as f: files = {'file': f} file_data = api.post(endpoint, data, is_json=False, files=files) return cls(api, file_data['uuid'], file_data)
[docs] @classmethod def get_files(cls, api:'GeoboxClient', **kwargs) -> Union[List['File'], int]: """ Retrieves a list of files. Args: api (GeoboxClient): The GeoboxClient instance for making requests. Keyword Args: q (str): query filter based on OGC CQL standard. e.g. "field1 LIKE '%GIS%' AND created_at > '2021-01-01'" search (str): search term for keyword-based searching among search_fields or all textual fields if search_fields does not have value. NOTE: if q param is defined this param will be ignored. search_fields (str): comma separated list of fields for searching order_by (str): comma separated list of fields for sorting results [field1 A|D, field2 A|D, …]. e.g. name A, type D. NOTE: "A" denotes ascending order and "D" denotes descending order. return_count (bool): if true, the total number of results will be returned. default is False. skip (int): number of results to skip. default is 0. limit (int): number of results to return. default is 10. user_id (int): filter by user id. shared (bool): Whether to return shared files. default is False. Returns: List[File] | int: A list of File objects or the total number of results. Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> files = File.get_files(client, search_fields='name', search='GIS', order_by='name', skip=10, limit=10) or >>> files = client.get_files(search_fields='name', search='GIS', order_by='name', skip=10, limit=10) """ params = { 'f': 'json', 'q': kwargs.get('q', None), 'search': kwargs.get('search', None), 'search_fields': kwargs.get('search_fields', None), 'order_by': kwargs.get('order_by', None), 'return_count': kwargs.get('return_count', False), 'skip': kwargs.get('skip', 0), 'limit': kwargs.get('limit', 10), 'user_id': kwargs.get('user_id', None), 'shared': kwargs.get('shared', False) } return super()._get_list(api, cls.BASE_ENDPOINT, params, factory_func=lambda api, item: cls(api, item['uuid'], item))
[docs] @classmethod def get_file(cls, api: 'GeoboxClient', uuid: str, user_id: int = None) -> 'File': """ Retrieves a file by its UUID. Args: api (Api): The GeoboxClient instance. uuid (str): The UUID of the file. user_id (int, optional): specific user. privileges required. Returns: File: The retrieved file instance. Raises: NotFoundError: If the file with the specified UUID is not found. Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") or >>> file = client.get_file(uuid="12345678-1234-5678-1234-567812345678") """ params = { 'f': 'json', 'user_id': user_id } return super()._get_detail(api, cls.BASE_ENDPOINT, f'{uuid}/info', params, factory_func=lambda api, item: File(api, item['uuid'], item))
[docs] @classmethod def get_files_by_name(cls, api: 'GeoboxClient', name: str, user_id: int = None) -> List['File']: """ Get files by name Args: api (GeoboxClient): The GeoboxClient instance for making requests. name (str): the name of the file to get user_id (int, optional): specific user. privileges required. Returns: List[File]: returns files that matches the given name Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> files = File.get_files_by_name(client, name='test') or >>> files = client.get_files_by_name(name='test') """ return cls.get_files(api, q=f"name = '{name}'", user_id=user_id)
[docs] def _get_file_name(self, response: requests.Response) -> str: """ Get the file name from the response. Args: response (requests.Response): The response of the request. Returns: str: The file name """ if 'Content-Disposition' in response.headers and 'filename=' in response.headers['Content-Disposition']: file_name = response.headers['Content-Disposition'].split('filename=')[-1].strip().strip('"') else: content_type = response.headers.get("Content-Type", "") file_name = f'{self.name}.{mimetypes.guess_extension(content_type.split(";")[0])}' return file_name
[docs] def _create_progress_bar(self) -> 'tqdm': """Creates a progress bar for the task.""" try: from tqdm.auto import tqdm except ImportError: from .api import logger logger.warning("[tqdm] extra is required to show the progress bar. install with: pip insatll geobox[tqdm]") return None return tqdm(unit="B", total=int(self.size), file=sys.stdout, dynamic_ncols=True, desc="Downloading", unit_scale=True, unit_divisor=1024, ascii=True )
[docs] def download(self, save_path: str = None, progress_bar: bool = True, file_name: str = None, overwrite: bool = False) -> str: """ Download a file and save it to the specified path. Args: save_path (str, optional): Path where the file should be saved. If not provided, it saves to the current working directory using the original filename and appropriate extension. progress_bar (bool, optional): Whether to show a progress bar. default: True file_name (str, optional): the downloaded file name. overwrite (bool, optional): whether to overwrite the downloaded file if it exists on the save path. default is False. Returns: str: Path where the file was saved Raises: ValueError: If uuid is not set OSError: If there are issues with file operations Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> file.download(save_path='path/to/save/') """ if not self.uuid: raise ValueError("File UUID is required to download the file") save_path = get_save_path(save_path) os.makedirs(os.path.dirname(save_path), exist_ok=True) with self.api.get(f"{self.endpoint}download/", stream=True) as response: file_name = self._get_file_name(response) if not file_name else file_name full_path = f"{save_path}/{file_name}" if os.path.exists(full_path) and not overwrite: full_path = get_unique_filename(save_path, file_name) with open(full_path, 'wb') as f: pbar = self._create_progress_bar() if progress_bar else None for chunk in response.iter_content(chunk_size=8192): f.write(chunk) if pbar: pbar.update(len(chunk)) pbar.refresh() if pbar: pbar.close() return os.path.abspath(full_path)
[docs] def delete(self) -> None: """ Deletes the file. Returns: None Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> file.delete() """ return super()._delete(self.endpoint)
[docs] def publish(self, name: str, publish_as: 'PublishFileType' = None, input_geom_type: 'InputGeomType' = None, input_layer: str = None, input_dataset: str = None, user_id: int = None, input_srid: int = Feature.BASE_SRID, file_encoding: str = "UTF-8", replace_domain_codes_by_values: bool = False, report_errors: bool = True, as_terrain: bool = False) -> 'Task': """ Publishes a file as a layer. Args: name (str): The name of the layer. publish_as (PublishFileType, optional): The type of layer to publish as. input_geom_type (InputGeomType, optional): The geometry type of the layer. input_layer (str, optional): The name of the input layer. input_dataset (str, optional): The name of the input dataset. user_id (int, optional): Specific user. privileges required. input_srid (int, optional): The SRID of the layer. default is: 3857 file_encoding (str, optional): The encoding of the file. default is "utf-8". replace_domain_codes_by_values (bool, optional): Whether to replace domain codes by values. default is False. report_errors (bool, optional): Whether to report errors. default is True. as_terrain (bool, optional): Whether to publish as terrain. default is False. Returns: Task: The task object. Raises: ValueError: If the publish_as is not a valid PublishFileType. ValidationError: if the zipped file doesn't have any layers to publish. Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> file.publish(publish_as=PublishFileType.VECTOR, ... layer_name='my_layer', ... input_geom_type=InputGeomType.POINT, ... input_layer='layer1', ... input_dataset='dataset1', ... input_srid=4326, ... file_encoding='UTF-8') """ if not publish_as: # checks the file format or file first layer format to dynamically set the publish_as if self.file_type.value in ['GeoJSON', 'GPKG', 'DXF', 'GPX', 'Shapefile', 'KML', 'CSV', 'FileGDB'] or \ (self.file_type.value in ['Complex'] and self.layers and \ FileType(self.layers[0]['format']).value in ['GeoJSON', 'GPKG', 'DXF', 'GPX', 'Shapefile', 'KML', 'CSV', 'FileGDB']): publish_as = PublishFileType.VECTOR elif self.file_type.value in ['GeoTIFF'] or \ (self.file_type.value in ['Complex'] and self.layers and \ FileType(self.layers[0]['format']).value in ['GeoTIFF']): publish_as = PublishFileType.RASTER elif self.file_type.value in ['GLB'] or \ (self.file_type.value in ['Complex'] and self.layers and \ FileType(self.layers[0]['format']).value in ['GLB']): publish_as = PublishFileType.MODEL3D elif self.file_type.value in ['ThreedTiles']: publish_as = PublishFileType.Tiles3D else: raise ValidationError('Unknown format') data = clean_data({ "publish_as": publish_as.value if isinstance(publish_as, PublishFileType) else publish_as, "layer_name": name, "input_layer": self.layers[0]['layer'] if not input_layer and self.layers else input_layer, "input_geom_type": input_geom_type.value if isinstance(input_geom_type, InputGeomType) else input_geom_type, "replace_domain_codes_by_values": replace_domain_codes_by_values, "input_dataset": self.layers[0]['dataset'] if not input_layer and self.layers else input_dataset, "user_id": user_id, "input_srid": input_srid, "file_encoding": file_encoding, "report_errors": report_errors, "as_terrain": as_terrain }) endpoint = urljoin(self.endpoint, 'publish/') response = self.api.post(endpoint, data, is_json=False) task = Task.get_task(self.api, response.get('task_id')) return task
[docs] def share(self, users: List['User']) -> None: """ Shares the file with specified users. Args: users (List[User]): The list of users objects to share the file with. Returns: None Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> users = client.search_users(search='John') >>> file.share(users=users) """ super()._share(self.endpoint, users)
[docs] def unshare(self, users: List['User']) -> None: """ Unshares the file with specified users. Args: users (List[User]): The list of users objects to unshare the file with. Returns: None Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> users = client.search_users(search='John') >>> file.unshare(users=users) """ super()._unshare(self.endpoint, users)
[docs] def get_shared_users(self, search: str = None, skip: int = 0, limit: int = 10) -> List['User']: """ Retrieves the list of users the file is shared with. Args: search (str, optional): The search query. skip (int, optional): The number of users to skip. limit (int, optional): The maximum number of users to retrieve. Returns: List[User]: The list of shared users. Example: >>> from geobox import GeoboxClient >>> from geobox.file import File >>> client = GeoboxClient() >>> file = File.get_file(client, uuid="12345678-1234-5678-1234-567812345678") >>> file.get_shared_users(search='John', skip=0, limit=10) """ params = { 'search': search, 'skip': skip, 'limit': limit } return super()._get_shared_users(self.endpoint, params)