less than 1 minute read

If you work a lot with s3 urls like these

url = "s3://some-bucket-name/dataset-name/some-folder"

Then this can come in handy. I found this on StackOverflow:

from urllib.parse import urlparse 
class S3Url(object): 
    # From: https://stackoverflow.com/questions/42641315/s3-urls-get-bucket-name-and-path
    def __init__(self, url): 
        self._parsed = urlparse(url, allow_fragments=False) 

    @property 
    def bucket(self): 
        return self._parsed.netloc 

    @property 
    def key(self): 
        if self._parsed.query: 
            return self._parsed.path.lstrip("/") + "?" + self._parsed.query 
        else: 
            return self._parsed.path.lstrip("/") 

    @property 
    def url(self): 
        return self._parsed.geturl()

Works like a charm!

s = S3Url("s3://bucket/hello/world")  
print(s.bucket)  # 'bucket'  
print(s.key)  # 'hello/world' 

s = S3Url("s3://bucket/hello/world?qwe1=3#ddd")  
print(s.bucket)  # 'bucket'  
print(s.key) # 'hello/world?qwe1=3#ddd'

Subscribe

Comments