Google cloud storage python download file like object

Google cloud storage python download file like object

google cloud storage python download file like object

Tensorflow have an implementation that gives a file like object for gc blobs: crwilcox transferred this issue from googleapis/google-cloud-python on Jan 31. @product-auto- Download an entire bucket to a virtual machine. This page shows Python examples of google.cloud.storage. like or vote down the ones you don't like, and go to the original project or source file by A list of blob object names. composite_name: Name of the new composite. "%2f") blob.​download_to_filename(destination) print(f"{file_path} downloaded to {destination​}. Streaming arbitrary length binary data to Google Cloud Storage. python -m pip install -U google-resumable-media To accomplish this I implemented an object that both buffered data and had a file-like interface in order for. google cloud storage python download file like object

Google cloud storage python download file like object - remarkable, the

Python google.cloud.storage.Client() Examples

The following are 30 code examples for showing how to use google.cloud.storage.Client(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module google.cloud.storage, or try the search function .

def _compose_files(project, bucket_name, blob_names, composite_name): # type: (str, str, List[str], str) -> None """Composes multiple files (up to 32 objects) in GCS to one. Args: project: The project name. bucket_name: The name of the bucket where the `components` and the new composite are saved. blob_names: A list of blob object names. composite_name: Name of the new composite. """ bucket = storage.Client(project).get_bucket(bucket_name) output_file_blob = bucket.blob(composite_name) output_file_blob.content_type = 'text/plain' blobs = [bucket.get_blob(blob_name) for blob_name in blob_names] output_file_blob.compose(blobs)
def __init__(self, project, bucket_name, blob_prefix): # type: (str, str, str) -> None """Initializes a `MultiProcessComposer`. This class composes all blobs that start with `blob_prefix` to one. Args: project: The project name. bucket_name: The name of the bucket where the blob components and the new composite are saved. blob_prefix: The prefix used to filter blobs. Only the blobs with this prefix will be composed. """ self._project = project self._bucket_name = bucket_name self._blob_prefix = blob_prefix self._bucket = storage.Client(project).get_bucket(bucket_name)
def from_config(cls, config, creds=None): """Returns an initialized CloudStorageAPI object. Args: config: common.ProjectConfig, the project configuration. creds: auth.CloudCredentials, the credentials to use for client authentication. Returns: An authenticated CloudStorageAPI instance. """ if creds is None: creds = auth.CloudCredentials(config, cls.SCOPES) client = storage.Client( project=config.project, credentials=creds.get_credentials(cls.SCOPES)) return cls(config, client)
def download_from_bucket(file_path): """Downloads file from Google Cloud Storage bucket""" path_segments = file_path[5:].split("/") storage_client = storage.Client() bucket_name = path_segments[0] bucket = storage_client.get_bucket(bucket_name) source = "/".join(path_segments[1:]) blob = bucket.blob(source) destination = "/tmp/" + source.replace("/", "%2f") blob.download_to_filename(destination) print(f"{file_path} downloaded to {destination}.") return destination
def _upload_to_gcs(self, gcs_project_id, target_bucket_name, bucket_folder, filename): '''upload CSV to file in GCS Args: gcs_project_id (str): project name target_bucket_name (str): name of GCS bucket bucket_folder (str): name of GCS folder filename (str): filepath to upload Returns: nothing. Side effect is that data is uploaded to GCS ''' storage_client = storage.Client(gcs_project_id) bucket = storage_client.get_bucket(target_bucket_name) path = bucket_folder + os.sep + filename logging.info("Loading to GCS: %s", path) blob = bucket.blob(path) #name in GCS blob.upload_from_filename(filename)
def cost_usage_source_is_reachable(self, credential_name, data_source): """ Verify that the GCP bucket exists and is reachable. Args: credential_name (object): not used; only present for interface compatibility data_source (dict): dict containing name of GCP storage bucket """ storage_client = storage.Client() bucket = data_source["bucket"] try: bucket_info = storage_client.lookup_bucket(bucket) if not bucket_info: # if the lookup does not return anything, then this is an nonexistent bucket key = "billing_source.bucket" message = f"The provided GCP bucket {bucket} does not exist" raise serializers.ValidationError(error_obj(key, message)) except GoogleCloudError as e: key = "billing_source.bucket" raise serializers.ValidationError(error_obj(key, e.message)) return True
def __init__(self, task, customer_name, billing_source, **kwargs): """ Constructor. Args: task (Object) bound celery object customer_name (str): Name of the customer billing_source (dict): dict containing name of GCP storage bucket """ super().__init__(task, **kwargs) self.bucket_name = billing_source["bucket"] self.report_prefix = billing_source.get("report_prefix", "") self.customer_name = customer_name.replace(" ", "_") self._provider_uuid = kwargs.get("provider_uuid") try: GCPProvider().cost_usage_source_is_reachable(None, billing_source) self._storage_client = storage.Client() self._bucket_info = self._storage_client.lookup_bucket(self.bucket_name) except ValidationError as ex: msg = f"GCP bucket {self.bucket_name} for customer {customer_name} is not reachable. Error: {str(ex)}" LOG.error(log_json(self.request_id, msg, self.context)) raise GCPReportDownloaderError(str(ex))
def upload_to_gcs(filenames, gcs_bucket_path): """Upload wave file to GCS, at provided path.""" path_parts = gcs_bucket_path[5:].split('/', 1) bucket_name = path_parts[0] if len(path_parts) == 1: key_prefix = '' elif path_parts[1].endswith('/'): key_prefix = path_parts[1] else: key_prefix = path_parts[1] + '/' client = storage.Client(project=os.environ["PROJECT_NAME"]) bucket = client.get_bucket(bucket_name) def _upload_files(filenames): """Upload a list of files into a specifc subdirectory.""" for i, filename in enumerate(filenames): blob = bucket.blob(key_prefix + os.path.basename(filename)) blob.upload_from_filename(filename) if not i % 5: tf.logging.info('Finished uploading file: %s' % filename) _upload_files(filenames)
def _upload_to_gcp_storage(exec_dir): """Upload all files to GCP storage under exec_dir folder. Args: exec_dir (str): The execution directory. """ _bucket = storage.Client().bucket('resl-garage-benchmarks') exec_name = os.path.basename(exec_dir) for folder_name in os.listdir(exec_dir): folder_path = os.path.join(exec_dir, folder_name) if not os.path.isfile(folder_path): remote_folder = os.path.join(exec_name, folder_name) for file_name in os.listdir(folder_path): file_path = os.path.join(folder_path, file_name) if os.path.isfile(file_path): blob = _bucket.blob(os.path.join(remote_folder, file_name)) blob.upload_from_filename(file_path)
def init_gcs(): is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ from google.cloud import storage if not is_user_secrets_token_set: return storage from kaggle_gcp import get_integrations if not get_integrations().has_gcs(): return storage from kaggle_secrets import GcpTarget from kaggle_gcp import KaggleKernelCredentials monkeypatch_client( storage.Client, KaggleKernelCredentials(target=GcpTarget.GCS)) return storage
def maybe_upload_file(local_path): '''Upload a file to remote cloud storage if the path starts with gs:// or s3:// ''' if local_path.startswith(('s3://', 'gs://')): prefix = local_path.split(':')[0] remote_bucket_path = local_path[len("s3://"):] # same length bp = remote_bucket_path.split("/") bucket = bp[0] path = remote_bucket_path[1 + len(bucket):] # s3://example/file becomes s3:/example/file in Linux local_path = prefix + ':/' + remote_bucket_path if prefix == 's3': import boto3 s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT')) s3.upload_file(local_path, bucket, path) elif prefix == 'gs': from google.cloud import storage client = storage.Client() Hbucket = storage.Bucket(client, bucket) blob = storage.Blob(path, Hbucket) blob.upload_from_filename(local_path)
def maybe_upload_file(local_path): '''Upload a file to remote cloud storage if the path starts with gs:// or s3:// ''' if local_path.startswith(('s3://', 'gs://')): prefix = local_path.split(':')[0] remote_bucket_path = local_path[len("s3://"):] # same length bp = remote_bucket_path.split("/") bucket = bp[0] path = remote_bucket_path[1 + len(bucket):] # s3://example/file becomes s3:/example/file in Linux local_path = prefix + ':/' + remote_bucket_path if prefix == 's3': import boto3 s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT')) s3.upload_file(local_path, bucket, path) elif prefix == 'gs': from google.cloud import storage client = storage.Client() Hbucket = storage.Bucket(client, bucket) blob = storage.Blob(path, Hbucket) blob.upload_from_filename(local_path)
def __init__(self, fn): self.fn = fn if fn.startswith('gs://'): from google.cloud import storage self.s3client = None self.gclient = storage.Client() self.storage_dir = TemporaryDirectory() self.writer = tf.python_io.TFRecordWriter( os.path.join(self.storage_dir.name, 'temp.tfrecord')) self.bucket_name, self.file_name = self.fn.split( 'gs://', 1)[1].split('/', 1) else: self.s3client = None self.gclient = None self.bucket_name = None self.file_name = None self.storage_dir = None self.writer = tf.python_io.TFRecordWriter(fn)
def gs_download_file(path): if path is None: return None parsed_path = urlparse(path) bucket_name = parsed_path.netloc file_path = parsed_path.path[1:] try: gs_client = storage.Client() bucket = gs_client.get_bucket(bucket_name) except exceptions.DefaultCredentialsError: logger.info('Switching to anonymous google storage client') gs_client = storage.Client.create_anonymous_client() bucket = gs_client.bucket(bucket_name, user_project=None) blob = bucket.blob(file_path) tmp_path = os.path.join('/tmp', file_path.split(os.sep)[-1]) blob.download_to_filename(tmp_path) return tmp_path
def get_local_file(source_path): parsed_path = urlparse(source_path) if parsed_path.scheme == "gs": bucket_name = parsed_path.netloc file_path = parsed_path.path[1:] file_name = os.path.split(parsed_path.path)[1] try: gs_client = storage.Client() bucket = gs_client.get_bucket(bucket_name) except exceptions.DefaultCredentialsError: # if credentials fails, try to connect as anonymous user gs_client = storage.Client.create_anonymous_client() bucket = gs_client.bucket(bucket_name, user_project=None) blob = bucket.blob(file_path) blob.download_to_filename(file_name) elif parsed_path.scheme == "": # in case of local path just pass the input argument if os.path.isfile(source_path): file_name = source_path else: print("file " + source_path + "is not accessible") file_name = "" return file_name
def upload_file(source_file, target_folder): parsed_path = urlparse(target_folder) if parsed_path.scheme == "gs": bucket_name = parsed_path.netloc folder_path = parsed_path.path[1:] try: gs_client = storage.Client() bucket = gs_client.get_bucket(bucket_name) blob = bucket.blob(folder_path + "/" + source_file) blob.upload_from_filename(source_file) except Exception as er: print(er) return False elif parsed_path.scheme == "": if target_folder != ".": copy(source_file, target_folder) return True
def list_blobs(bucket_name): """Lists all the blobs in the bucket.""" storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) blobs = bucket.list_blobs() return blobs
def __init__(self, project_id, bucket_name): """Initialize client with project id and name of the storage bucket.""" self.project_id = project_id self.bucket_name = bucket_name self.client = storage.Client(project=project_id) self.bucket = self.client.get_bucket(bucket_name)
def __init__(self, project_id, namespace=None): """Init this method with given project id and optional namespace.""" self._client = datastore.Client(project=project_id, namespace=namespace)
def compose_gcs_vcf_shards(project, # type: str vcf_header_file_path, # type: str vcf_data_files_folder, # type: str output_file, # type: str delete=False, # type: bool ): # type: (...) -> None """Composes VCF shards in GCS to one VCF file. It composes VCF header and VCF data files to one VCF file, and deletes the original VCF shards if `delete` is True. Args: project: The project name. vcf_header_file_path: The path of the VCF header file, it contains the meta information, as well as the data header line with the sample names. vcf_data_files_folder: The folder that contains all VCF data files. output_file: The final VCF file path. delete: If true, delete the original VCF shards. """ header_bucket_name, header_blob = gcsio.parse_gcs_path(vcf_header_file_path) vcf_data_bucket_name, vcf_data_blob_prefix = gcsio.parse_gcs_path( vcf_data_files_folder) if vcf_data_bucket_name != header_bucket_name: raise ValueError('The VCF data files {} and header file {} are in ' 'different buckets. '.format(vcf_data_files_folder, vcf_header_file_path)) composed_vcf_data_blob = _compose_vcf_data_files(project, vcf_data_files_folder) client = storage.Client(project) bucket = client.get_bucket(vcf_data_bucket_name) output_file_blob = _create_blob(client, output_file) output_file_blob.compose([bucket.get_blob(header_blob), composed_vcf_data_blob]) if delete: bucket.delete_blobs(bucket.list_blobs(prefix=vcf_data_blob_prefix)) bucket.delete_blobs(bucket.list_blobs(prefix=header_blob))
def _create_blob(client, file_path): # type: (storage.Client, str) -> storage.Blob bucket_name, blob_name = gcsio.parse_gcs_path(file_path) file_blob = client.get_bucket(bucket_name).blob(blob_name) file_blob.content_type = 'text/plain' return file_blob
def validate_result(self): """Validates the results. - Checks that the report is generated. - Validates report's contents are the same as `expected_contents`. - Checks that the resolved headers are generated if `header_blob_name` is specified in the test. """ client = storage.Client(self._project) bucket = client.get_bucket(_BUCKET_NAME) report_blob = bucket.get_blob(self._report_blob_name) if not report_blob: raise run_tests_common.TestCaseFailure( 'Report is not generated in {} in test {}'.format(self._report_path, self._name)) contents = report_blob.download_as_string() expected_contents = '\n'.join(self._expected_contents) if expected_contents != contents: raise run_tests_common.TestCaseFailure( 'Contents mismatch: expected {}, got {} in test {}'.format( expected_contents, contents, self._name)) if not self._keep_reports: report_blob.delete() if self._header_blob_name: resolved_headers_blob = bucket.get_blob(self._header_blob_name) if not resolved_headers_blob: raise run_tests_common.TestCaseFailure( 'The resolved header is not generated in {} in test {}'.format( self._header_path, self._name)) if not self._keep_reports: resolved_headers_blob.delete()
def __init__(self, fn): self.fn = fn if fn.startswith('s3://'): from boto3.s3.transfer import TransferConfig import boto3 self.gclient = None self.s3client = boto3.client('s3', ) self.storage_dir = TemporaryDirectory() self.writer = tf.python_io.TFRecordWriter(os.path.join(self.storage_dir.name, 'temp.tfrecord')) self.bucket_name, self.file_name = self.fn.split('s3://', 1)[1].split('/', 1) elif fn.startswith('gs://'): from google.cloud import storage self.s3client = None self.gclient = storage.Client() self.storage_dir = TemporaryDirectory() self.writer = tf.python_io.TFRecordWriter(os.path.join(self.storage_dir.name, 'temp.tfrecord')) self.bucket_name, self.file_name = self.fn.split('gs://', 1)[1].split('/', 1) else: self.s3client = None self.gclient = None self.bucket_name = None self.file_name = None self.storage_dir = None self.writer = tf.python_io.TFRecordWriter(fn)
def __init__(self, gcloud_name): assert gcloud_name.startswith('gs://') self.gcloud_name = gcloud_name bucket_name, blob_name = gcloud_name.split('gs://')[1].split('/', 1) bucket = storage.Client().get_bucket(bucket_name) self.blob = bucket.blob(blob_name)
def mark_done(gspath): """Uploads a file to the bucket to indicate comletion of training job. gspath is a path to the output directory of training such as gs://$PROJECT-model-output/$MODEL_NAME/$MODEL_VERSION/output """ url = urlparse(gspath) if url.scheme != "gs": raise RuntimeError("not a Google Storage URL") bucket_name = url.netloc storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) blob = bucket.blob(url.path.strip("/") + "/TRAINER-DONE") blob.upload_from_string("done")
def __init__(self, root_dir): try: from google.cloud import storage except ImportError: print('GoogleStorage backend requires the package ' '"google-cloud-storage", execute ' '"pip install google-cloud-storage" to install it.') self.client = storage.Client() bucket_str = root_dir[5:].split('/')[0] self.bucket = self.client.get_bucket(bucket_str) self.folder_str = root_dir[6 + len(bucket_str):] if self.folder_str[0] == '/': self.folder_str = self.folder_str[1:]
def get_blob(url, client=None): if client is None: client = storage.Client() bucket_name, path = parse_url(url) bucket = client.get_bucket(bucket_name) return bucket.get_blob(path)
def upload_contents(url, contents, client=None): """Given a gs:// path, returns contents of the corresponding blob.""" if client is None: client = storage.Client() bucket_name, path = parse_url(url) bucket = client.get_bucket(bucket_name) blob = storage.Blob(path, bucket) blob.upload_from_string(contents)
def download_data(): """Download the data from Google Cloud Storage""" # Load the Dataset from the public GCS bucket bucket = storage.Client().bucket('cloud-samples-data') # Path to the data inside the public bucket blob = bucket.blob('ml-engine/sonar/sonar.all-data') # Download the data blob.download_to_filename('sonar.all-data')
def save_model(model_dir, model_name): """Saves the model to Google Cloud Storage""" bucket = storage.Client().bucket(model_dir) blob = bucket.blob('{}/{}'.format( datetime.datetime.now().strftime('sonar_%Y%m%d_%H%M%S'), model_name)) blob.upload_from_filename(model_name)
Источник: [https://torrent-igruha.org/3551-portal.html]

Google cloud storage python download file like object

3 thoughts to “Google cloud storage python download file like object”

Leave a Reply

Your email address will not be published. Required fields are marked *