...
Referencia de como integrar o Bitbucket no Databricks: Integração Bitbucket com Databricks [DESCONTINUADO]
SEMPRE CRIAR BRANCH NOVAS PARTINDO DA BRANCH DEVELOP.
Todas as nossas branches deverão ser de um dos quatro tipos abaixo.
...
Code Block |
---|
from pyspark.sql.functions import * host = "" user = "" password = "" port = "" dbDatabase = "" driver= "com.mysql.jdbc.Driver" url= "jdbc:mysql://"+host+":"+port+"/"+dbDatabase+"" def read_table_mysql(sqlQuery): try: df = spark.read.format("jdbc")\ .option("driver", driver)\ .option("url", url)\ .option("query", sqlQuery)\ .option("user", user)\ .option("password", password)\ .load() return df except Exception as ex: print(f"❌ Erro na leitura da query") print(ex) pass |
ref: https://docs.databricks.com/external-data/mysql.html
Como acessar um banco de dados Postgres
Code Block |
---|
from pyspark.sql.functions import * host = "" dbname = "" user = "" password = "" port = 5432 bucket_name = dbutils.secrets.get(scope = 'PROCESS_ETL_ANALYTICKS_DATABRICKS_API', key = 'BUCKET_TEMP_DATA_TRANSFER') aws_access = dbutils.secrets.get(scope = 'AWS_SVC_S3_DATA_TRANSFER', key = 'ACCESS_KEY') aws_secret = dbutils.secrets.get(scope = 'AWS_SVC_S3_DATA_TRANSFER', key = 'SECRET_KEY') sc._jsc.hadoopConfiguration().set("fs.s3a.access.key", aws_access) sc._jsc.hadoopConfiguration().set("fs.s3a.secret.key", aws_secret) conn_string = f'jdbc:postgresql://{host}:{port}/{dbname}?user={user}&password={password}' def query_postgres(select, nome_tabela): try: tempdir = f"s3a://{bucket_name}/Redshift/PRODUTOS-DIGITAIS/{tabela}" df = (spark.read.format("jdbc") .option("url", conn_string) .option("forward_spark_s3_credentials", "true") .option("query", select) .option("tempdir", tempdir) .option("driver", "org.postgresql.Driver") .load()) return df except Exception as ex: print(f"❌ Erro na leitura da query") print(ex) pass |
ref:https://docs.databricks.com/external-data/postgresql.html
Como acessar um banco de dados Sql Server
Code Block |
---|
# DBTITLE 1,Configuração SQL Server
host = dbutils.secrets.get(scope = 'DB_ETIQUETA_SQL_SERVER', key = 'HOST')
database = dbutils.secrets.get(scope = 'DB_ETIQUETA_SQL_SERVER', key = 'DATABASE')
password = dbutils.secrets.get(scope = 'DB_ETIQUETA_SQL_SERVER', key = 'PASS')
user = dbutils.secrets.get(scope = 'DB_ETIQUETA_SQL_SERVER', key = 'USER')
server_name = f"jdbc:sqlserver://{host}:1433"
url = server_name + ";" + "databaseName=" + database + ";encrypt=true;trustServerCertificate=true;"
TABLE_TO_WRITE = '<NOME DA TABELA>'
#READ TABLE
remote_table = (spark.read
.format("jdbc")
.option("url", url)
.option("dbtable", TABLE_TO_WRITE)
.option("user", user)
.option("password", password)
.load()
)
# READ QUERY
remote_table = (spark.read
.format("jdbc")
.option("url", url)
.option("query", query)
.option("user", user)
.option("password", password)
.load()
)
# WRITE TABLE
df.distinct().write.format("jdbc")\
.mode("overwrite")\
.option("truncate","true")\
.option("url", url)\
.option("dbtable", TABLE_TO_WRITE)\
.option("user", user)\
.option("password", password) \
.option("schemaCheckEnabled", "false")\
.save() |
ref: https://docs.databricks.com/external-data/sql-server.html