当前位置 : 主页 > 编程语言 > python >

Greenplum Python工具库gpload学习——get_fast_match_exttable_query和get_reuse_exttable_query

来源:互联网 收集:自由互联 发布时间:2022-08-10
查询可重复利用的外部表,只指定location、data format和error limit相同 # Fast path to find out whether we have an existing external table in the # catalog which could be reused for this operation. we only make sure the # lo


查询可重复利用的外部表,只指定location、data format和error limit相同

# Fast path to find out whether we have an existing external table in the
# catalog which could be reused for this operation. we only make sure the
# location, data format and error limit are same. we don't check column
# names and types
#
# This function will return the SQL to run in order to find out whether
# such a table exists. The results of this SQl are table names without schema
#
def get_fast_match_exttable_query(self, formatType, formatOpts, limitStr, schemaName, log_errors, encodingCode):

sqlFormat = """select relname from pg_class
join
pg_exttable pgext
on(pg_class.oid = pgext.reloid)
%s
where
relstorage = 'x' and
relname like 'ext_gpload_reusable_%%' and
%s
"""

joinStr = ""
conditionStr = ""

# if schemaName is None, find the resuable ext table which is visible to
# current search path. Else find the resuable ext table under the specific
# schema, and this needs to join pg_namespace.
if schemaName is None:
joinStr = ""
conditionStr = "pg_table_is_visible(pg_class.oid)"
else:
joinStr = """join
pg_namespace pgns
on(pg_class.relnamespace = pgns.oid)"""
conditionStr = "pgns.nspname = '%s'" % schemaName

sql = sqlFormat % (joinStr, conditionStr)

if self.gpdb_version < "6.0.0":
if log_errors:
sql += " and pgext.fmterrtbl = pgext.reloid "
else:
sql += " and pgext.fmterrtbl IS NULL "
else:
if log_errors:
sql += " and pgext.logerrors "
else:
sql += " and NOT pgext.logerrors "

for i, l in enumerate(self.locations):
sql += " and pgext.urilocation[%s] = %s\n" % (i + 1, quote(l))

sql+= """and pgext.fmttype = %s
and pgext.writable = false
and pgext.fmtopts like %s """ % (quote(formatType[0]),quote("%" + quote_unident(formatOpts.rstrip()) +"%"))

if limitStr:
sql += "and pgext.rejectlimit = %s " % limitStr
else:
sql += "and pgext.rejectlimit IS NULL "

if encodingCode:
sql += "and pgext.encoding = %s " % encodingCode

sql+= "limit 1;"

self.log(self.DEBUG, "query used to fast match external relations:\n %s" % sql)
return

查询可重复利用的外部表,指定column names and types, the same data format, and location specification, and single row error handling specs相同

def get_reuse_exttable_query(self, formatType, formatOpts, limitStr, from_cols, schemaName, log_errors, encodingCode):
sqlFormat = """select attrelid::regclass
from (
select
attrelid,
row_number() over (partition by attrelid order by attnum) as attord,
attnum,
attname,
atttypid::regtype
from
pg_attribute
join
pg_class
on (pg_class.oid = attrelid)
%s
where
relstorage = 'x' and
relname like 'ext_gpload_reusable_%%' and
attnum > 0 and
not attisdropped and %s
) pgattr
join
pg_exttable pgext
on(pgattr.attrelid = pgext.reloid)
"""
joinStr = ""
conditionStr = ""

# if schemaName is None, find the resuable ext table which is visible to
# current search path. Else find the resuable ext table under the specific
# schema, and this needs to join pg_namespace.
if schemaName is None:
joinStr = ""
conditionStr = "pg_table_is_visible(pg_class.oid)"
else:
joinStr = """join
pg_namespace pgns
on(pg_class.relnamespace = pgns.oid)
"""
conditionStr = "pgns.nspname = '%s'" % schemaName

sql = sqlFormat % (joinStr, conditionStr)

if self.gpdb_version < "6.0.0":
if log_errors:
sql += " WHERE pgext.fmterrtbl = pgext.reloid "
else:
sql += " WHERE pgext.fmterrtbl IS NULL "
else:
if log_errors:
sql += " WHERE pgext.logerrors "
else:
sql += " WHERE NOT pgext.logerrors "

for i, l in enumerate(self.locations):
sql += " and pgext.urilocation[%s] = %s\n" % (i + 1, quote(l))

sql+= """and pgext.fmttype = %s
and pgext.writable = false
and pgext.fmtopts like %s """ % (quote(formatType[0]),quote("%" + quote_unident(formatOpts.rstrip()) +"%"))

if limitStr:
sql += "and pgext.rejectlimit = %s " % limitStr
else:
sql += "and pgext.rejectlimit IS NULL "

if encodingCode:
sql += "and pgext.encoding = %s " % encodingCode

sql+= "group by attrelid "

sql+= """having
count(*) = %s and
bool_and(case """ % len(from_cols)

for i, c in enumerate(from_cols):
name = c[0]
typ = c[1]
sql+= "when attord = %s then atttypid = %s::regtype and attname = %s\n" % (i+1, quote(typ), quote(quote_unident(name)))

sql+= """else true
end)
limit 1;"""

self.log(self.DEBUG, "query used to identify reusable external relations: %s" % sql)
return


网友评论