I am trying to create a methodology for passing parameters automatically through something like locals(), similarly to how f-strings work.
How it currently works
import pandas as pd
def my_func(conn, string_id, date, integer_ids):
sql = f"""
select * from TABLE a
where STRING_ID = '{string_id}'
and DATE = {date}
and INTEGER_ID in ({','.join(map(str, integer_ids))})"""
df = pd.read_sql(sql, conn)
return df
However, this approach means I cannot copy-paste the SQL into SQL developer or similar, and run it from there. So I would like an approach that makes use of parameters instead.
There seems to be two problems with that
locals()How I would like it to work would be something like the example below (which obviously doesn't work)
import pandas as pd
def my_func(conn, string_id, date, integer_ids):
sql = """
select * from TABLE
where STRING_ID = :string_id
and DATE = :date
and INTEGER_ID in :integer_ids"""
df = pd.read_sql(sql, conn, params=locals())
return df
EDIT: Thanks to perl, I now have a working solution to my problem
def read_sql(sql, conn, params):
# Finds all words following a ":" sign in the sql
for p in re.findall(':(\w+)', sql):
if isinstance(params.get(p), (tuple, list)):
ext_params = {f'{p}_{i:03d}': p_i for i, p_i in enumerate(params.get(p))}
sql = sql.replace(f':{p}', f"(:{', :'.join(ext_params)})")
params.update(ext_params)
sql_text = sqlalchemy.text(sql)
return pd.read_sql(sql_text, conn, params=params)
def my_func(conn, string_id, date, integer_ids):
sql = """
select * from TABLE
where STRING_ID = :string_id
and DATE = :date
and INTEGER_ID in :integer_ids"""
df = read_sql(sql, conn, locals())
return df
EDIT2: For anyone finding this question, I have since then extended the solution a bit to cover issues where lists longer than 1000 elements are passed
def generate_sql(sql: str, params: dict = None, param_key: str = ':') -> List[Tuple[sqlalchemy.text, dict]]:
if params is None:
params = dict()
max_sql_params = 1000
out = []
# Finds all words following a ":" sign in the query
for p in set(re.findall(f"{param_key}(\w+)", sql)):
if isinstance(params.get(p), (tuple, list, np.ndarray)):
# Recursively call function for variables with more than 1000 elements
if len(params[p]) > max_sql_params:
new_params = params.copy() # NB: Shallow copy sufficient as param keys are tuples, lists or arrays
new_params[p] = params[p][max_sql_params:]
out.extend(generate_sql(sql=sql, params=new_params, param_key=param_key))
extra_params = {f"{p}_{i:03d}": p_i for i, p_i in enumerate(params[p][:max_sql_params])}
sql = sql.replace(f":{p}", f"(:{', :'.join(extra_params)})")
params.update(extra_params)
sql_text = sqlalchemy.text(sql)
out.append((sql_text, params))
return out
def read_sql(sql: str, conn: sqlalchemy.engine, params: dict = None) -> pd.DataFrame:
sql_tuples = generate_sql(sql=sql, params=params)
df = pd.concat(pd.read_sql(sql=s, con=conn, params=p) for s, p in sql_tuples)
return df
You can use parametrized queries by wrapping the query in sqlalchemy.text and converting lists to tuples. For example:
def my_func(conn, min_number, letters):
# convert lists to tuples
letters = tuple(letters)
# wrap sql in sqlalchemy.text
sql = sqlalchemy.text("""
SELECT *
FROM letters
WHERE
number >= :min_number AND
letter in :letters""")
# read and return the resulting dataframe
df = pd.read_sql(sql, conn, params=locals())
return df
my_func(conn, 10, ['a', 'b', 'c', 'x', 'y', 'z'])
Output:
letter number
0 x 23
1 y 24
2 z 25
For completeness of the example, the following was used as a test table:
df = pd.DataFrame({
'letter': list(string.ascii_lowercase),
'number': range(len(string.ascii_lowercase))})
df.to_sql('letters', conn, index=False)
Update: Here's a possible workaround for Oracle to make it work with lists:
def get_query(sql, **kwargs):
for k, v in kwargs.items():
vs = "','".join(v)
sql = sql.replace(f':{k}', f"('{vs}')")
return sql
def my_func(conn, min_number, letters):
sql_template = """
SELECT *
FROM letters
WHERE
number >= :min_number AND
letter in :letters
"""
# pass list variables to `get_query` function as named parameters
# to get parameters replaced with ('value1', 'value2', ..., 'valueN')
sql = sqlalchemy.text(
get_query(sql_template, letters=letters))
df = pd.read_sql(sql, conn, params=locals())
return df
my_func(conn, 10, ['a', 'b', 'c', 'x', 'y', 'z'])
Update 2: Here's the get_query function that works with both strings and numbers (enclosing in quotes strings, but not numbers):
def get_query(sql, **kwargs):
# enclose in quotes strings, but not numbers
def q(x):
q = '' if isinstance(x, (int, float)) else "'"
return f'{q}{x}{q}'
# replace with values
for k, v in kwargs.items():
sql = sql.replace(f':{k}', f"({','.join([q(x) for x in v])})")
return sql
For example:
sql = """
SELECT *
FROM letters
WHERE
number in :numbers AND
letters in :letters
"""
get_query(sql,
numbers=[1, 2, 3],
letters=['A', 'B', 'C'])
Output:
SELECT *
FROM letters
WHERE
number in (1,2,3) AND
letters in ('A','B','C')
why not this :
import pandas as pd
def my_func(conn, string_id, date, integer_ids):
sql = """
select * from RISK
where STRING_ID = %s
and DATE = %s
and INTEGER_ID in %s"""
df = pd.read_sql(sql, conn, (string_id, date,integer_ids))
return df
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With