From 26eb568323791c3575c153bc1f71cecac99c0911 Mon Sep 17 00:00:00 2001 From: Neil Cook Date: Thu, 17 Oct 2024 08:22:17 -0400 Subject: [PATCH] [APERO] possible fix for duplicates in v0.7.288-stable-test (taken from v0.7.290) Issues #788, #786, #782 --- .../tools/module/database/manage_databases.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/apero/tools/module/database/manage_databases.py b/apero/tools/module/database/manage_databases.py index 739f9cb18..2e8685868 100644 --- a/apero/tools/module/database/manage_databases.py +++ b/apero/tools/module/database/manage_databases.py @@ -686,6 +686,10 @@ def get_object_database(params: ParamDict, log: bool = True) -> Table: if len(_table) != 0: # make sure we have the object name column if gl_objcol in _table.colnames: + # we can't keep duplicates in the _table drop them and keep most + # recent (lowest in list) + _table = _drop_duplicates(_table, gl_objcol) + # create a mask of valies not in the main table pmask = ~np.in1d(_table[gl_objcol], maintable[gl_objcol]) # add new columns to main table maintable = vstack([maintable, _table[pmask]]) @@ -1007,6 +1011,23 @@ def _force_column_dtypes(table: Table, coltype: Dict[str, type]) -> Table: return table +def _drop_duplicates(table: Table, column: str, keep: str = 'last'): + """ + Drop duplicates in an astropy table + + :param table: astropy table + :param column: column that is unique + :param keep: str, value to keep ('last', 'first') + :return: + """ + # convert table to dataframe + df = table.to_pandas() + # remove duplicates + df = df.drop_duplicates(subset=column, keep=keep) + # convert back to astropy table + return Table.from_pandas(df) + + # ============================================================================= # Start of code # =============================================================================