How to get bucket name and key for s3 urls in python
If you work a lot with s3 urls like these
url = "s3://some-bucket-name/dataset-name/some-folder"
Then this can come in handy. I found this on StackOverflow:
from urllib.parse import urlparse
class S3Url(object):
# From: https://stackoverflow.com/questions/42641315/s3-urls-get-bucket-name-and-path
def __init__(self, url):
self._parsed = urlparse(url, allow_fragments=False)
@property
def bucket(self):
return self._parsed.netloc
@property
def key(self):
if self._parsed.query:
return self._parsed.path.lstrip("/") + "?" + self._parsed.query
else:
return self._parsed.path.lstrip("/")
@property
def url(self):
return self._parsed.geturl()
Works like a charm!
s = S3Url("s3://bucket/hello/world")
print(s.bucket) # 'bucket'
print(s.key) # 'hello/world'
s = S3Url("s3://bucket/hello/world?qwe1=3#ddd")
print(s.bucket) # 'bucket'
print(s.key) # 'hello/world?qwe1=3#ddd'
Comments