Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Init GcsClient only when Pandas Dataframe is passed + Reformat import…
… statements
  • Loading branch information
TrucHLe committed Aug 28, 2019
commit 9190485cfe578b120b0b57df7dd1a8fc50a5fea9
17 changes: 9 additions & 8 deletions automl/google/cloud/automl_v1beta1/tables/gcs_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@

"""Wraps the Google Cloud Storage client library for use in tables helper."""

import google
import time

from google.api_core import exceptions

try:
import pandas
except ImportError: # pragma: NO COVER
Expand All @@ -31,7 +32,7 @@

_PANDAS_REQUIRED = "pandas is required to verify type DataFrame."
_STORAGE_REQUIRED = (
"google.cloud.storage is required to create Google Cloud Storage client."
"google-cloud-storage is required to create Google Cloud Storage client."
)


Expand Down Expand Up @@ -66,8 +67,8 @@ def ensure_bucket_exists(self, project, region):
Creates this bucket if it doesn't exist.

Args:
project (string): The project that stores the bucket.
region (string): The region of the bucket.
project (str): The project that stores the bucket.
region (str): The region of the bucket.

Returns:
A string representing the created bucket name.
Expand All @@ -76,18 +77,18 @@ def ensure_bucket_exists(self, project, region):

try:
self.client.get_bucket(bucket_name)
except google.cloud.exceptions.NotFound:
bucket = self.client.Bucket(bucket_name)
except exceptions.NotFound:
bucket = self.client.bucket(bucket_name)
bucket.create(project=project, location=region)
Comment thread
TrucHLe marked this conversation as resolved.
return bucket_name

def upload_pandas_dataframe(self, bucket_name, dataframe, uploaded_csv_name=None):
"""Uploads a Pandas DataFrame as CSV to the bucket.

Args:
bucket_name (string): The bucket name to upload the CSV to.
bucket_name (str): The bucket name to upload the CSV to.
dataframe (pandas.DataFrame): The Pandas Dataframe to be uploaded.
uploaded_csv_name (Optional[string]): The name for the uploaded CSV.
uploaded_csv_name (Optional[str]): The name for the uploaded CSV.

Returns:
A string representing the GCS URI of the uploaded CSV.
Expand Down
6 changes: 2 additions & 4 deletions automl/google/cloud/automl_v1beta1/tables/tables_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,9 +755,8 @@ def import_data(

request = {}

self.__ensure_gcs_client_is_initialized(credentials)

if pandas_dataframe is not None:
self.__ensure_gcs_client_is_initialized(credentials)
bucket_name = self.gcs_client.ensure_bucket_exists(project, region)
gcs_input_uri = self.gcs_client.upload_pandas_dataframe(
bucket_name, pandas_dataframe
Expand Down Expand Up @@ -2749,9 +2748,8 @@ def batch_predict(

input_request = None

self.__ensure_gcs_client_is_initialized(credentials)

if pandas_dataframe is not None:
self.__ensure_gcs_client_is_initialized(credentials)
bucket_name = self.gcs_client.ensure_bucket_exists(project, region)
gcs_input_uri = self.gcs_client.upload_pandas_dataframe(
bucket_name, pandas_dataframe
Expand Down
3 changes: 2 additions & 1 deletion automl/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def default(session):
for local_dep in LOCAL_DEPS:
session.install("-e", local_dep)
session.install("-e", ".")
session.install("-e", ".[all]")
session.install("-e", ".[pandas,storage]")

# Run py.test against the unit tests.
session.run(
Expand Down Expand Up @@ -118,6 +118,7 @@ def system(session):
session.install("-e", local_dep)
session.install("-e", "../test_utils/")
session.install("-e", ".")
session.install("-e", ".[pandas,storage]")

# Run py.test against the system tests.
if system_test_exists:
Expand Down
7 changes: 0 additions & 7 deletions automl/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@
"storage": ["google-cloud-storage >= 1.18.0, < 2.0.0dev"],
}

all_extras = []

for extra in extras:
all_extras.extend(extras[extra])

extras["all"] = all_extras

package_root = os.path.abspath(os.path.dirname(__file__))

readme_filename = os.path.join(package_root, "README.rst")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_import_pandas_dataframe(self):
client = automl_v1beta1.TablesClient(project=PROJECT, region=REGION)
display_name = _id("t_import_pandas")
dataset = client.create_dataset(display_name)
dataframe = pandas.DataFrame({})
dataframe = pandas.DataFrame({"test-col1": [1, 2], "test-col2": [3, 4]})
op = client.import_data(
project=PROJECT, dataset=dataset, pandas_dataframe=dataframe
)
Expand Down
9 changes: 4 additions & 5 deletions automl/tests/unit/gapic/v1beta1/test_gcs_client_v1beta1.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

"""Unit tests."""

import google
import mock
import pandas
import pytest
Expand All @@ -35,8 +34,8 @@ def test_ensure_bucket_exists(self):
mock_bucket = mock.Mock()
gcs_client = self.gcs_client(
{
"get_bucket.side_effect": google.cloud.exceptions.NotFound("err"),
"Bucket.return_value": mock_bucket,
"get_bucket.side_effect": exceptions.NotFound("err"),
"bucket.return_value": mock_bucket,
}
)
returned_bucket_name = gcs_client.ensure_bucket_exists(
Expand All @@ -45,7 +44,7 @@ def test_ensure_bucket_exists(self):
gcs_client.client.get_bucket.assert_called_with(
"my-project-automl-tables-staging"
)
gcs_client.client.Bucket.assert_called_with("my-project-automl-tables-staging")
gcs_client.client.bucket.assert_called_with("my-project-automl-tables-staging")
mock_bucket.create.assert_called_with(
project="my-project", location="us-central1"
)
Expand All @@ -60,7 +59,7 @@ def test_ensure_bucket_exists_bucket_already_exists(self):
gcs_client.client.get_bucket.assert_called_with(
"my-project-automl-tables-staging"
)
gcs_client.client.Bucket.assert_not_called()
gcs_client.client.bucket.assert_not_called()
assert returned_bucket_name == "my-project-automl-tables-staging"

def test_upload_pandas_dataframe(self):
Expand Down