diff --git a/.gitignore b/.gitignore index 443e658..ef79918 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ key.txt ui/apiKeys.py ui/__pycache__/ +*__pycache__* +.DS_Store +crash_info.log + +.idea diff --git a/collBook.py b/collBook.py index cc2b2a6..1b66869 100644 --- a/collBook.py +++ b/collBook.py @@ -21,7 +21,7 @@ """ __author__ = "Caleb Powell, Jacob Motley" -__credits__ = ["Caleb Powell, Jacob Motley, Joey Shaw"] +__credits__ = ["Caleb Powell, Jacob Motley, Dax Ledesma, Joey Shaw"] __email__ = "calebadampowell@gmail.com" __status__ = "Alpha" __version__ = 'v0.1.8-alpha' diff --git a/ui/importindexdialog.py b/ui/importindexdialog.py index 03d387b..98d50eb 100644 --- a/ui/importindexdialog.py +++ b/ui/importindexdialog.py @@ -16,15 +16,15 @@ class importDialog(QDialog): the necessary index fields are missing from a csv file being loaded. Called in pandastablemodel, under the open_CSV function""" - def __init__(self, parent=None, df=False): + def __init__(self, parent=None, df=False, inat=False): super().__init__() - self.init_ui(parent, df) + self.init_ui(parent, df, inat) - def init_ui(self, parent, df): + def init_ui(self, parent, df, inat): if isinstance(df, pd.DataFrame): self.parent = parent # this is the master window importDialog = Ui_importIndexDialog() - importDialog.setupUi(self) + importDialog.setupUi(self, inat) self.df = df # populate the qcombo boxes for box in [importDialog.value_Existing_Specimen_Numbers, @@ -54,8 +54,14 @@ def indexAssignments(self): # generate sequential specimenNumbers numSeq = [str(x+1) for x in range(len(self.df))] self.df['specimenNumber'] = numSeq + useExistingSite = self.importDialog.value_Use_Existing_Site_Numbers - if useExistingSite.isChecked(): + useOneSite = self.importDialog.value_Use_One_Site + + if useOneSite.isChecked(): + numSeq = [str(1) for _ in range(len(self.df))] + self.df['siteNumber'] = numSeq + elif useExistingSite.isChecked(): # generate sequential siteNumbers existingCol = self.importDialog.value_Existing_Site_Numbers.currentText() self.df['siteNumber'] = self.df[existingCol] diff --git a/ui/importindexdialogUI.py b/ui/importindexdialogUI.py index e005099..6247d62 100644 --- a/ui/importindexdialogUI.py +++ b/ui/importindexdialogUI.py @@ -9,7 +9,7 @@ from PyQt5 import QtCore, QtGui, QtWidgets class Ui_importIndexDialog(object): - def setupUi(self, importIndexDialog): + def setupUi(self, importIndexDialog, inat): importIndexDialog.setObjectName("importIndexDialog") importIndexDialog.resize(411, 396) font = QtGui.QFont() @@ -36,9 +36,19 @@ def setupUi(self, importIndexDialog): self.horizontalLayout_2.addWidget(self.value_Existing_Site_Numbers) self.formLayout.setLayout(0, QtWidgets.QFormLayout.SpanningRole, self.horizontalLayout_2) self.value_Gen_Site_Numbers = QtWidgets.QRadioButton(self.groupBox) - self.value_Gen_Site_Numbers.setChecked(True) + if inat: + self.value_Gen_Site_Numbers.setChecked(False) + else: + self.value_Gen_Site_Numbers.setChecked(True) self.value_Gen_Site_Numbers.setObjectName("value_Gen_Site_Numbers") self.formLayout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.value_Gen_Site_Numbers) + self.value_Use_One_Site = QtWidgets.QRadioButton(self.groupBox) + if inat: + self.value_Use_One_Site.setChecked(True) + else: + self.value_Use_One_Site.setChecked(False) + self.value_Use_One_Site.setObjectName("value_Use_One_Site") + self.formLayout.setWidget(2, QtWidgets.QFormLayout.LabelRole, self.value_Use_One_Site) self.gridLayout.addWidget(self.groupBox, 6, 0, 1, 1) self.horizontalLayout_3 = QtWidgets.QHBoxLayout() self.horizontalLayout_3.setObjectName("horizontalLayout_3") @@ -80,20 +90,25 @@ def setupUi(self, importIndexDialog): spacerItem2 = QtWidgets.QSpacerItem(20, 40, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Expanding) self.gridLayout.addItem(spacerItem2, 4, 0, 1, 1) - self.retranslateUi(importIndexDialog) - self.value_Use_Existing_Specimen_Numbers.toggled['bool'].connect(self.value_Existing_Specimen_Numbers.setEnabled) + self.retranslateUi(importIndexDialog, inat) self.value_Use_Existing_Site_Numbers.toggled['bool'].connect(self.value_Existing_Site_Numbers.setEnabled) + self.value_Use_Existing_Specimen_Numbers.toggled['bool'].connect(self.value_Existing_Specimen_Numbers.setEnabled) QtCore.QMetaObject.connectSlotsByName(importIndexDialog) - def retranslateUi(self, importIndexDialog): + def retranslateUi(self, importIndexDialog, inat): _translate = QtCore.QCoreApplication.translate importIndexDialog.setWindowTitle(_translate("importIndexDialog", "Form")) self.groupBox.setTitle(_translate("importIndexDialog", "Site Number")) self.value_Use_Existing_Site_Numbers.setText(_translate("importIndexDialog", "Pick from existing columns")) self.value_Gen_Site_Numbers.setText(_translate("importIndexDialog", "Generate unique site numbers")) + self.value_Use_One_Site.setText(_translate("importIndexDialog", "Treat all imported records as one site (recommended for iNaturalist-like files)")) self.pushButton_Cancel.setText(_translate("importIndexDialog", "Cancel")) self.pushButton_Assign.setText(_translate("importIndexDialog", "Assign")) - self.label.setText(_translate("importIndexDialog", "Could not locate indexing fields (ie: siteNumber, specimenNumber, or otherCatalogNumbers).")) + if inat: + self.label.setText(_translate("importIndexDialog", "Importing iNaturalist file.")) + else: + self.label.setText(_translate("importIndexDialog", + "Could not locate indexing fields (ie: siteNumber, specimenNumber, or otherCatalogNumbers).")) self.label_2.setText(_translate("importIndexDialog", "Select how to assign index fields.")) self.groupBox_2.setTitle(_translate("importIndexDialog", "Specimen Numbers")) self.value_Use_Existing_Specimen_Numbers.setText(_translate("importIndexDialog", "Pick from existing columns")) diff --git a/ui/locality.py b/ui/locality.py index 29257cc..b326ee1 100644 --- a/ui/locality.py +++ b/ui/locality.py @@ -50,6 +50,7 @@ def userAsk(self, text): def reverseGeoCall(self, latitude, longitude): apiUrl = f'https://maps.googleapis.com/maps/api/geocode/json?latlng={str(latitude)},{str(longitude)}&key={self.gAPIkey}' + print(apiUrl) try: apiCall = requests.get(apiUrl) except ConnectionError: @@ -132,8 +133,12 @@ def genLocality(self, currentRowArg): country = addressComponent['short_name'] newLocality['country'] = country currentRowArg['country'] = country + if addressComponent['types'][0] == 'natural_feature': + country = addressComponent['natural_feature'] + newLocality['natural_feature'] = country + currentRowArg['natural_feature'] = country # construct the locality items with a controlled order - localityList = ['country','stateProvince','county','municipality','park','path'] + localityList = ['country','stateProvince','county','municipality','natural_feature','park','path'] localityItemList = [] for item in localityList: newLocalityItem = newLocality.get(item, False) diff --git a/ui/pandastablemodel.py b/ui/pandastablemodel.py index 19858b6..45fb08b 100644 --- a/ui/pandastablemodel.py +++ b/ui/pandastablemodel.py @@ -21,8 +21,9 @@ import pandas as pd import numpy as np + class PandasTableModel(QtCore.QAbstractTableModel): - def __init__(self, parent=None, editable = True, *args): + def __init__(self, parent=None, editable=True, *args): super(PandasTableModel, self).__init__(parent) self.parent = parent self.datatable = None # what the user is seeing & interacting with @@ -34,7 +35,7 @@ def __init__(self, parent=None, editable = True, *args): def addToUndoList(self, description='undo the last major action'): """ to be called just before a change is made to the underlaying df """ selection = self.parent.getTreeSelectionType() - df = self.datatable # save the details into a checkpoint + df = self.datatable # save the details into a checkpoint checkPoint = (df.copy(deep=True), selection, description) self.undoList.append(checkPoint) self.redoList = [] # if we're adding to undoList, clear redoList @@ -48,7 +49,7 @@ def redo(self): try: checkpoint = self.redoList.pop() except IndexError: - checkpoint = (None, (None, None, None), 'the last major action') + checkpoint = (None, (None, None, None), 'the last major action') df, sel, msg = checkpoint if isinstance(df, pd.DataFrame): self.datatable = df @@ -80,22 +81,22 @@ def updateUndoRedoButtons(self): """ called if the self.undoIndex changes. Updates the hint text of the undo, & redo buttons to reflect the description appended in addToUndoList""" - if len(self.undoList) > 0 : + if len(self.undoList) > 0: self.parent.w.action_undo.setEnabled(True) - msg = self.undoList[-1][-1]#.replace('redo: ', 'undo: ') + msg = self.undoList[-1][-1] # .replace('redo: ', 'undo: ') msg = f'undo: {msg}' else: self.parent.w.action_undo.setEnabled(False) msg = 'undo the last major action' self.parent.w.action_undo.setToolTip(msg) - if len(self.redoList) > 0 : + if len(self.redoList) > 0: self.parent.w.action_redo.setEnabled(True) - msg = self.redoList[-1][-1]#.replace('undo', 'redo') + msg = self.redoList[-1][-1] # .replace('undo', 'redo') msg = f'redo: {msg}' else: self.parent.w.action_redo.setEnabled(False) - msg = 'redo the last major action' + msg = 'redo the last major action' self.parent.w.action_redo.setToolTip(msg) def update(self, dataIn): @@ -103,36 +104,36 @@ def update(self, dataIn): self.datatable = dataIn self.endResetModel() # let display elements know about the change (ie: qTreeWidget) - self.dataChanged.emit(QtCore.QModelIndex(),QtCore.QModelIndex() , (QtCore.Qt.DisplayRole, )) + self.dataChanged.emit(QtCore.QModelIndex(), QtCore.QModelIndex(), (QtCore.Qt.DisplayRole,)) def addNewSite(self): """ adds a new, nearly blank site record to the dataTable """ df = self.datatable try: - newSiteNum = max(pd.to_numeric(df['siteNumber'], errors = 'coerce')) + 1 + newSiteNum = max(pd.to_numeric(df['siteNumber'], errors='coerce')) + 1 except ValueError: newSiteNum = 1 self.addToUndoList(f'added site {newSiteNum}') # set checkpoint in undostack - rowData = {'recordNumber':f'{newSiteNum}-#', - 'siteNumber':f'{newSiteNum}', - 'specimenNumber':'#', - 'associatedTaxa':''} + rowData = {'recordNumber': f'{newSiteNum}-#', + 'siteNumber': f'{newSiteNum}', + 'specimenNumber': '#', + 'associatedTaxa': ''} defVals = self.parent.form_view.readDefaultNewSiteFields() rowData.update(defVals) # be sure to clear associatedTaxa # TODO determine why it otherwise copies associatedTaxa from other site - df = df.append(rowData, ignore_index=True, sort=False) - df.fillna('', inplace = True) + df = df.append(rowData, ignore_index=True, sort=False) + df.fillna('', inplace=True) self.update(df) self.parent.populateTreeWidget() # change tree_widget's selection to the to new site. self.parent.selectTreeWidgetItemByName(f'Site {newSiteNum}(0)') - + def addNewSpecimen(self): """ adds a new specimen record to selected site """ df = self.datatable selType, siteNum, specimenNum = self.parent.getTreeSelectionType() - if selType in ['site','specimen']: + if selType in ['site', 'specimen']: try: # try to make the new row data spNums = df[df['specimenNumber'] != '#']['specimenNumber'] newSpNum = max(pd.to_numeric(spNums, errors='coerce')) + 1 @@ -141,7 +142,7 @@ def addNewSpecimen(self): newRowData = df[(df['siteNumber'] == siteNum) & (df['specimenNumber'] == '#')].copy() newRowData['specimenNumber'] = f'{newSpNum}' - catNum = f'{siteNum}-{newSpNum}' + catNum = f'{siteNum}-{newSpNum}' self.addToUndoList(f'added specimen {catNum}') # set checkpoint in undostack newRowData['recordNumber'] = catNum df = df.append(newRowData, ignore_index=True, sort=False) @@ -167,7 +168,7 @@ def duplicateSpecimen(self): newRowData = df[(df['siteNumber'] == siteNum) & (df['specimenNumber'] == specimenNum)].copy() newRowData['specimenNumber'] = f'{newSpNum}' - catNum = f'{siteNum}-{newSpNum}' + catNum = f'{siteNum}-{newSpNum}' newRowData['recordNumber'] = catNum df = df.append(newRowData, ignore_index=True, sort=False) df = self.sortDF(df) @@ -176,7 +177,7 @@ def duplicateSpecimen(self): self.parent.populateTreeWidget() # change tree_widget's selection to the to new specimen. self.parent.selectTreeWidgetItemByName(catNum) - + def deleteSite(self): """ called from the delete site button """ df = self.datatable @@ -206,20 +207,20 @@ def deleteSpecimen(self): # change tree_widget's selection to All Records. self.parent.w.checkBox_deleteRecord.setCheckState(Qt.Unchecked) self.parent.selectTreeWidgetItemByName(f'Site ({siteNum})') - self.parent.expandCurrentTreeWidgetItem() # re-expand the site selection + self.parent.expandCurrentTreeWidgetItem() # re-expand the site selection def rowCount(self, parent=QtCore.QModelIndex()): return len(self.datatable.index) def columnCount(self, parent=QtCore.QModelIndex()): return len(self.datatable.columns.values) - + def columnIndex(self, colName): """ given a column name, returns the index of it's location. Called by updateTableView to get the index of "specimenNumber" for sorting.""" result = self.datatable.columns.get_loc(colName) return result - + def retrieveRowData(self, i): """ given a row index number returns the data as a series """ df = self.datatable @@ -229,25 +230,26 @@ def getSelectedLabelDict(self, df): """Returns a list of dictionaries from given df organized as {column name: cell value}.""" df = df.fillna('') - if isinstance(df, pd.Series): # not sure if a series or df will be handed in + if isinstance(df, pd.Series): # not sure if a series or df will be handed in data = [df.to_dict()] else: data = df.to_dict(orient='records') labelDicts = [] for datum in data: - #datum = {key: value.strip() for key, value in datum.items() if isinstance(value,str)} #dict comprehension! - datum = {key: value for key, value in datum.items() if isinstance(value,str)} # strip command was preventing spaces from being entered - #if datum.get('specimenNumber') not in ['#','!AddSITE']: #keep out the site level records! + # datum = {key: value.strip() for key, value in datum.items() if isinstance(value,str)} #dict comprehension! + datum = {key: value for key, value in datum.items() if + isinstance(value, str)} # strip command was preventing spaces from being entered + # if datum.get('specimenNumber') not in ['#','!AddSITE']: #keep out the site level records! labelDicts.append(datum) return labelDicts - def geoRef(self): + def geoRef(self, selType): """ applies genLocality over each row among those selected. Combines api calls for records from the same site.""" # Needs modified If editing site data at specimen level records is re-enabled. self.addToUndoList('geolocate process') # set checkpoint in undostack - selType, siteNum, specimenNum = self.parent.getTreeSelectionType() + _, siteNum, specimenNum = self.parent.getTreeSelectionType() if selType == 'site': # hacky method to get only site level record (catalogNumber: "n-#") rowsToProcess = self.getRowsToProcess('specimen', siteNum, '#') @@ -255,13 +257,15 @@ def geoRef(self): elif selType == 'allRec': records = self.getSiteSpecimens() # less hacky method to get every site level record - rowsToProcess = [x for x,y in records if y == '#'] + rowsToProcess = [idx for idx, record in enumerate(records) if record[1] != '#'] sitesToUpdate = rowsToProcess else: rowsToProcess = self.getRowsToProcess(selType, siteNum, specimenNum) sitesToUpdate = [] - self.processViewableRecords(rowsToProcess, self.parent.locality.genLocality) - self.inheritGeoRefFields(sitesToUpdate) # send site data down stream. + + print(rowsToProcess) + self.processViewableRecords(rowsToProcess, self.parent.locality.genLocality) + self.inheritGeoRefFields(sitesToUpdate) # send site data down stream. def inheritGeoRefFields(self, sitesToUpdate): """ passess all geoReference fields from sites to children records """ @@ -277,9 +281,9 @@ def inheritGeoRefFields(self, sitesToUpdate): df[col] = "" for site in sitesToUpdate: - #df.loc[df['Col1'].isnull(),['Col1','Col2', 'Col3']] = replace_with_this.values - newVals = df.loc[(df['siteNumber'] == site) & (df['specimenNumber'] == '#')][geoRefCols] - df.loc[(df['siteNumber']== site) & (df['specimenNumber'] != '#'), geoRefCols] = newVals.values.tolist() + # df.loc[df['Col1'].isnull(),['Col1','Col2', 'Col3']] = replace_with_this.values + newVals = df.loc[(df['siteNumber'] == site) & (df['specimenNumber'] != '#')][geoRefCols] + df.loc[(df['siteNumber'] == site) & (df['specimenNumber'] != '#'), geoRefCols] = newVals.values.tolist() QApplication.processEvents() self.datatable.update(df) self.update(self.datatable) @@ -287,7 +291,7 @@ def inheritGeoRefFields(self, sitesToUpdate): def assignCatalogNumbers(self): """If appropriate assigns catalogNumbers over each visible row.""" - #TODO consider checking SERNEC for those Catalog Number's existance + # TODO consider checking SERNEC for those Catalog Number's existance # IE: http://sernecportal.org/portal/collections/list.php?db=311&catnum=UCHT012345%20-%20UCHT0123555&othercatnum=1 # Could webscrape a requests return from something like: http://sernecportal.org/portal/collections/list.php?db=311&catnum=UCHT999900%20-%20UCHT999991&othercatnum=1 # the checkbox for enabling the "Assign catalog numbers" group box. @@ -297,19 +301,19 @@ def assignCatalogNumbers(self): if assign: rowsToProcess = self.getRowsToProcess(*self.parent.getTreeSelectionType()) - dfOrig = self.datatable.iloc[rowsToProcess, ] + dfOrig = self.datatable.iloc[rowsToProcess,] try: - #be sure to only assign catalog numbers to empty fields - df = dfOrig.loc[(dfOrig['specimenNumber'].str.isdigit()) & - (dfOrig['catalogNumber'] == '')].copy() + # be sure to only assign catalog numbers to empty fields + df = dfOrig.loc[(dfOrig['specimenNumber'].str.isdigit()) & + (dfOrig['catalogNumber'] == '')].copy() except KeyError: # address a no catalogNumber condition df = dfOrig.loc[dfOrig['specimenNumber'].str.isdigit()].copy() df['catalogNumber'] = '' if len(df) > 0: - #check what type of catalogNumbers to assign + # check what type of catalogNumbers to assign newCatNums = [] - if uuidCat: # if generating them on the fly... - #uuid possible alphabet = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" + if uuidCat: # if generating them on the fly... + # uuid possible alphabet = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" # trunicated to 13 symbols long requires approx 35,040,000,000 specimens for a 1% chance of collision for _ in range(len(df.index)): trunicated_uuid = uuid()[:13] @@ -318,7 +322,7 @@ def assignCatalogNumbers(self): self.datatable.update(df) self.update(self.datatable) self.parent.updateTableView() - elif patCat: # otherwise use pattern + elif patCat: # otherwise use pattern catStartingNum = int(self.parent.settings.get('value_catalogNumberStartingNum')) catDigits = int(self.parent.settings.get('value_catalogNumberDigits')) catPrefix = self.parent.settings.get('value_catalogNumberPrefix') @@ -326,8 +330,9 @@ def assignCatalogNumbers(self): for i in range(len(df.index)): newCatNum = f'{catPrefix}{str(catStartingNum).zfill(catDigits)}' newCatNums.append(newCatNum) - catStartingNum += 1 # add 1 to the starting catNumber - answer = self.parent.userAsk(f'Assign catalog numbers: {newCatNums[0]} - {newCatNums[-1]} ?', 'Assigning Catalog Numbers') + catStartingNum += 1 # add 1 to the starting catNumber + answer = self.parent.userAsk(f'Assign catalog numbers: {newCatNums[0]} - {newCatNums[-1]} ?', + 'Assigning Catalog Numbers') if answer is True: # if the user agreed to assign the catalog numbers df['catalogNumber'] = newCatNums self.datatable.update(df) @@ -336,17 +341,21 @@ def assignCatalogNumbers(self): self.parent.settings.updateStartingCatalogNumber(catStartingNum) # after adding catnums pull in results and check for uniqueness # TODO Clean this function up! It is pretty awful looking.. - df = self.datatable.iloc[rowsToProcess, ] - dfUnique = df.loc[(df['specimenNumber'].str.isdigit()) & - (df['catalogNumber'] != '')].copy() + df = self.datatable.iloc[rowsToProcess,] + dfUnique = df.loc[(df['specimenNumber'].str.isdigit()) & + (df['catalogNumber'] != '')].copy() if not dfUnique['catalogNumber'].is_unique: # check for duplicated catalog numbers - dfNonUnique = dfUnique[dfUnique.duplicated(subset=['catalogNumber'],keep='first')].copy() # keep the first one as "unique" + dfNonUnique = dfUnique[dfUnique.duplicated(subset=['catalogNumber'], + keep='first')].copy() # keep the first one as "unique" newCatNums = [] - for i in range(len(dfNonUnique.index)): # generate a range of additional new catNums to apply to non-uniques + for i in range( + len(dfNonUnique.index)): # generate a range of additional new catNums to apply to non-uniques newCatNum = f'{catPrefix}{str(catStartingNum).zfill(catDigits)}' newCatNums.append(newCatNum) catStartingNum += 1 - answer = self.parent.userAsk(f'Duplicate catalog numbers found! Assign additional {newCatNums[0]} - {newCatNums[-1]} ? Selecting "NO" will keep the duplicate catalog numbers as they are.', 'Assigning Catalog Numbers') + answer = self.parent.userAsk( + f'Duplicate catalog numbers found! Assign additional {newCatNums[0]} - {newCatNums[-1]} ? Selecting "NO" will keep the duplicate catalog numbers as they are.', + 'Assigning Catalog Numbers') if answer is True: # if the user agreed to assign the catalog numbers dfNonUnique['catalogNumber'] = newCatNums self.datatable.update(dfNonUnique) @@ -368,11 +377,12 @@ def verifyAllButton(self): # TODO find logical point in workflow to clean associatedTaxa. selType, siteNum, specimenNum = self.parent.getTreeSelectionType() xButton = self.parent.statusBar.pushButton_Cancel - xButton.setEnabled(True) + xButton.setEnabled(True) if selType in ['site', 'specimen']: sites = [siteNum] else: # it is probably 'allRec'. - sites = [x for x,y in self.getSiteSpecimens() if y == '#'] + sites = set([x for x, y in self.getSiteSpecimens() if y != '#']) + for site in sites: # enforce a site-by-site workflow QApplication.processEvents() if xButton.status: # check for cancel button @@ -384,14 +394,14 @@ def verifyAllButton(self): self.verifyTaxButton() if xButton.status: # check for cancel button break - self.geoRef() + self.geoRef(selType) if xButton.status: # check for cancel button break # check user policy for associatedTaxa dialog if self.parent.settings.get('value_associatedAlways', True): self.associatedTaxDialog() elif self.parent.settings.get('value_associatedOnly', False): - records = self.getRowsToKeep('site', siteNum = site) + records = self.getRowsToKeep('site', siteNum=site) if len(records) > 2: self.associatedTaxDialog() elif self.parent.settings.get('value_associatedNever', False): @@ -399,7 +409,7 @@ def verifyAllButton(self): pass else: self.associatedTaxDialog() - + if xButton.status: # check for cancel button break QApplication.processEvents() @@ -407,7 +417,7 @@ def verifyAllButton(self): xButton.setEnabled(False) xButton.status = False self.parent.setTreeSelectionByType(selType, siteNum, specimenNum) # return the selection - + def associatedTaxDialog(self): """ displays the associatedTaxa dialog and waits for user input """ waitingForUser = QtCore.QEventLoop() @@ -415,26 +425,26 @@ def associatedTaxDialog(self): self.parent.associatedTaxaWindow.associatedMainWin.button_cancel.clicked.connect(waitingForUser.quit) # update associatedTaxa options before calling the input window. self.parent.associatedTaxaWindow.populateAssociatedTaxa() - self.parent.toggleAssociated() # call user input window and wait + self.parent.toggleAssociated() # call user input window and wait waitingForUser.exec_() def processViewableRecords(self, rowsToProcess, func): """ applies a function over each row among rowsToProcess (by index)""" - #self.parent.statusBar.label_status + # self.parent.statusBar.label_status xButton = self.parent.statusBar.pushButton_Cancel df = self.datatable.loc[rowsToProcess] totRows = len(df) pb = self.parent.statusBar.progressBar pb.setMinimum(0) pb.setMaximum(totRows) - for c,i in enumerate(rowsToProcess): + for c, i in enumerate(rowsToProcess): QApplication.processEvents() if xButton.status: # check for cancel button break rowData = df.loc[i] df.loc[i] = func(rowData) pb.setValue(c + 1) - #msg = (f'{c + 1} of {totRows}') + # msg = (f'{c + 1} of {totRows}') df.update(rowData) self.datatable.update(df) self.update(self.datatable) @@ -442,15 +452,14 @@ def processViewableRecords(self, rowsToProcess, func): self.parent.form_view.fillFormFields() pb.setValue(0) - - def getRowsToProcess(self, selType, siteNum = None, specimenNum = None): + def getRowsToProcess(self, selType, siteNum=None, specimenNum=None): """ defined for clarity, calls getRowsToKeep with the same args.""" return self.getRowsToKeep(selType, siteNum, specimenNum) - def getRowsToKeep(self, selType, siteNum = None, specimenNum = None): + def getRowsToKeep(self, selType, siteNum=None, specimenNum=None): """ Returns list of row indices associated with inputs """ df = self.datatable - #df = df[~df['specimenNumber'].str.contains('#')] + # df = df[~df['specimenNumber'].str.contains('#')] if selType == 'site': rowsToKeep = df[df['siteNumber'] == siteNum].index.values.tolist() elif selType == 'specimen': @@ -458,8 +467,8 @@ def getRowsToKeep(self, selType, siteNum = None, specimenNum = None): else: # otherwise, keep everything (usually "allRec") rowsToKeep = df.index.values.tolist() return rowsToKeep - - def getRowsToHide(self, selType, siteNum = None, specimenNum = None): + + def getRowsToHide(self, selType, siteNum=None, specimenNum=None): """ Returns list of row indicies NOT associated with input options called from mainWindow's updateTableView() following tree_widget selection changes.""" @@ -500,7 +509,7 @@ def setData(self, index, value, role=None): j = index.column() self.datatable.iloc[i, j] = value # this emission causes real time edits to appear on previewPDF window - self.dataChanged.emit(index, index, (QtCore.Qt.DisplayRole, )) + self.dataChanged.emit(index, index, (QtCore.Qt.DisplayRole,)) return True return False @@ -512,28 +521,38 @@ def headerData(self, p_int, Qt_Orientation, role=None): return self.datatable.index[p_int] return QtCore.QVariant() - def open_CSV(self, fileName = None): + def open_CSV(self, fileName=None): # is triggered by the action_Open. fileName, _ = QtWidgets.QFileDialog.getOpenFileName(None, "Open CSV", QtCore.QDir.homePath(), "CSV (*.csv)") + _translate = QtCore.QCoreApplication.translate + self.parent.setWindowTitle(_translate("MainWindow", f"collBook ({fileName})")) if fileName: # if a csv was selected, start loading the data. + inat = False + col_num = False try: - df = pd.read_csv(fileName, encoding = 'utf-8',keep_default_na=False, dtype=str) - df = df.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1) # drop any "unnamed" cols + df = pd.read_csv(fileName, encoding='utf-8', keep_default_na=False, dtype=str) + df = df.drop(df.columns[df.columns.str.contains('unnamed', case=False)], + axis=1) # drop any "unnamed" cols # check if input is an iNaturalist export + if "colNum" in df.columns: + col_num = True + try: # if so, parse those cols. if df['url'].str.lower().str.contains('inaturalist.org').any(): df = self.convertiNatFormat(df) + inat = True except KeyError: # probably not an iNaturalist export. pass + cols = df.columns # a list of cols which indicates the data may be from CollectR colectoRCols = ['Collector', 'Additional collectors', - 'Number', 'Infracategory', 'Herbarium Acronym', - 'Complete Herb. Name 1', 'Complete Herb. Name 2', - 'Project'] + 'Number', 'Infracategory', 'Herbarium Acronym', + 'Complete Herb. Name 1', 'Complete Herb. Name 2', + 'Project'] # check if input is a CollectoR export. if all(x in cols for x in colectoRCols): # if so, parse those cols. @@ -548,8 +567,8 @@ def open_CSV(self, fileName = None): wasAssigned = False # store if assignments were made. if not all(x in cols for x in ['siteNumber', 'specimenNumber']): if 'recordNumber' not in cols: - assignedDF, dialogStatus = self.getIndexAssignments(df) - # if the accept button (titled 'Assign') was pressed, assign df + assignedDF, dialogStatus = self.getIndexAssignments(df, inat) + # if the accept button (titled 'Assign') was pressed, assign df if dialogStatus == QDialog.Accepted: df = assignedDF cols = df.columns @@ -590,28 +609,31 @@ def open_CSV(self, fileName = None): df['newAssociatedCollectors'] = expandedCols[1] # clean up the | which may be left over df['newAssociatedCollectors'] = df.loc[df['newAssociatedCollectors'] - .str.contains('|')]['newAssociatedCollectors'].str.replace('|', ', ').str.replace(' ', ' ') + .str.contains('|')]['newAssociatedCollectors'].str.replace('|', ', ').str.replace(' ', ' ') # combine the two fields into one - df['newAssociatedCollectors'] = df['associatedCollectors'].str.split(', ') + df['newAssociatedCollectors'].str.split(', ') + df['newAssociatedCollectors'] = df['associatedCollectors'].str.split(', ') + df[ + 'newAssociatedCollectors'].str.split(', ') # In nearly all cases, this will preserve the name order. - df['associatedCollectors'] = df['newAssociatedCollectors'].apply(lambda x: ', '.join([y.strip() for y in pd.unique(x) if y != ''])) + df['associatedCollectors'] = df['newAssociatedCollectors'].apply( + lambda x: ', '.join([y.strip() for y in pd.unique(x) if y != ''])) # drop the 'newAssociatedCollectors' col. df.drop(columns=['newAssociatedCollectors'], inplace=True) cols = df.columns # after parsing the cols, round elevation values to reasonable floating point if 'minimumElevationInMeters' in df.columns: # complicated lambda deals with rounding values of unknown data types - df['minimumElevationInMeters'] = df['minimumElevationInMeters'].apply(lambda x: round(x, 1) if isinstance(x, (int, float)) else x).astype(str) + df['minimumElevationInMeters'] = df['minimumElevationInMeters'].apply( + lambda x: round(x, 1) if isinstance(x, (int, float)) else x).astype(str) self.update(df) # this function updates the visible dataframe self.parent.populateTreeWidget() self.parent.form_view.fillFormFields() return True # generalized exception, may be risky but is broad. except Exception as e: - title = 'Error loading records' + title = 'Error loading records' text = ' Failed to load the selected records. ' details = f'{e}' - self.parent.userNotice(text, title=title,detailText=details, + self.parent.userNotice(text, title=title, detailText=details, inclHalt=False) def save_CSV(self, fileName=False, df=None): @@ -623,21 +645,21 @@ def save_CSV(self, fileName=False, df=None): # convert empty strings to null values df.replace('', np.nan, inplace=True) df.dropna(axis=1, how='all', inplace=True) - readyToSave=False + readyToSave = False if not fileName: fileName, _ = QtWidgets.QFileDialog.getSaveFileName( - None, "Save CSV", QtCore.QDir.homePath(), "CSV (*.csv)") + None, "Save CSV", QtCore.QDir.homePath(), "CSV (*.csv)") if fileName: # if a csv was selected, start loading the data. if Path(fileName).suffix == '': fileName = f'{fileName}.csv' - readyToSave=True + readyToSave = True if Path(fileName).is_file(): - readyToSave=False + readyToSave = False message = f'File named: "{fileName}" already exist! OVERWRITE this file?' title = 'Save As' answer = self.parent.userAsk(message, title) if answer: - readyToSave=True + readyToSave = True else: readyToSave = True if readyToSave: @@ -658,7 +680,7 @@ def export_CSV(self, fileName=None, df=None): df.dropna(axis=1, how='all', inplace=True) if fileName is None: fileName, _ = QtWidgets.QFileDialog.getSaveFileName( - None, "Save CSV", QtCore.QDir.homePath(), "CSV (*.csv)") + None, "Save CSV", QtCore.QDir.homePath(), "CSV (*.csv)") if fileName: # if a csv was selected, start loading the data. drop_col_Names = ['siteNumber', 'specimenNumber'] keep_col_Names = [x for x in df.columns if x not in drop_col_Names] @@ -779,18 +801,17 @@ def verifySiteRecordsExist(self, df): # add results to the list of rows to be added newRow['specimenNumber'] = '#' newRow['siteNumber'] = siteNum - newRows.append(newRow) # if we have any rows to be added to the df, do so if len(newRows) > 0: df = df.append(newRows, ignore_index=True, sort=False) - + pb.setValue(0) - return df + return df - def getIndexAssignments(self, df): + def getIndexAssignments(self, df, inat): """ calls the dialog defined in importindexdialog.py and retrieves or generates the user defined siteNumber & specimenNumber columns.""" - dialog = importDialog(self, df) + dialog = importDialog(self, df, inat) result = dialog.exec_() resultDF = dialog.indexAssignments() # return the assignedDF and boolean if the dialog status was accepted. @@ -801,28 +822,33 @@ def sortDF(self, df): Expects the dataframe to have siteNumber and specimenNumber columns """ # first try and organize 2 temporary "sort on" columns try: - df['sortSpecimen'] = df['specimenNumber'].str.replace('#','0').astype('int64') - df['sortSite'] = df['siteNumber'].str.replace('','0').astype('int64') + df['sortSpecimen'] = df['specimenNumber'].str.replace('#', '0').astype('int64') + df['sortSite'] = df['siteNumber'].str.replace('', '0').astype('int64') # fill any nans with 0s before sorting on these cols fillValues = {'sortSpecimen': 0, 'sortSite': 0} - df = df.fillna(value=fillValues) + df = df.fillna(value=fillValues) except: return False df.sort_values(by=['sortSite', 'sortSpecimen'], inplace=True, ascending=True) - df.drop(columns = ['sortSite', 'sortSpecimen'], inplace = True) - df.reset_index(drop = True, inplace = True) + df.drop(columns=['sortSite', 'sortSpecimen'], inplace=True) + df.reset_index(drop=True, inplace=True) return df def inferrecordNumber(self, rowData): """ assigns recordNumber based on siteNumber & specimenNumber """ try: - rowData['recordNumber'] = f"{rowData['siteNumber']}-{rowData['specimenNumber']}" - except IndexError: - pass - return rowData + rowData['recordNumber'] = f"{rowData['colNum']}" + return rowData + except: + try: + rowData['recordNumber'] = f"{rowData['siteNumber']}-{rowData['specimenNumber']}" + except IndexError: + pass + return rowData def inferSiteSpecimenNumbers(self, df): """ attempts to infer a siteNumber and specimenNumber of an incoming df """ + def specimenNumExtract(catNum): try: result = catNum.split('-')[1] @@ -842,6 +868,7 @@ def siteNumExtract(catNum): return '' except (ValueError, IndexError, AttributeError) as e: return '' + try: df['siteNumber'] = df['recordNumber'].transform(lambda x: siteNumExtract(x)) df['specimenNumber'] = df['recordNumber'].transform(lambda x: specimenNumExtract(x)) @@ -854,16 +881,16 @@ def siteNumExtract(catNum): def convertiNatFormat(self, df): """converts iNaturalist formatted data into a compatable DWC format. This does not infer site numbers.""" - + colNames = df.columns # private_latitude,private_longitude may be used in place if coordinates_obscured == 'true' - if ('coordinates_obscured' in colNames & - df['coordinates_obscured'] == 'true'): + if ('coordinates_obscured' in colNames & + df['coordinates_obscured'] == 'true'): # for key value in following dict. where keys are private coordinate columns for k, v in { - 'private_latitude': 'latitude', - 'private_longitude': 'longitude', - 'private_positional_accuracy': 'positional_accuracy'}.items(): + 'private_latitude': 'latitude', + 'private_longitude': 'longitude', + 'private_positional_accuracy': 'positional_accuracy'}.items(): # essentially, if the private coordinate fields are filled, pull them into the non-private fields # this allows those fields to be treated similarly in the future. if df[k] != '': @@ -872,19 +899,19 @@ def convertiNatFormat(self, df): # TODO add localitySecurity consideration for this condition # if it was indicated as cultivated in iNat, convert results ahead of renaming. if ('captive_cultivated' in colNames & - df['captive_cultivated'] == 'true'): + df['captive_cultivated'] == 'true'): df['captive_cultivated'] = 'cultivated' colNameMap = { - "observed_on": "eventDate", - "url": "associatedMedia", - "latitude": "decimalLatitude", - "longitude": "decimalLongitude", - "positional_accuracy": "coordinateUncertaintyInMeters", - "scientific_name": "scientificName", - "description": "occurrenceRemarks", - "captive_cultivated": "establishmentMeans" - } + "observed_on": "eventDate", + "url": "occurrenceRemarks", + "latitude": "decimalLatitude", + "longitude": "decimalLongitude", + "positional_accuracy": "coordinateUncertaintyInMeters", + "scientific_name": "scientificName", + "captive_cultivated": "establishmentMeans", + "user_login": "recordedBy" + } df.rename(colNameMap, axis='columns', inplace=True) return df @@ -894,12 +921,12 @@ def convertColectoRFormat(self, df): This does not infer site numbers. For details on ColectoR see: Maya-Lastra, C.A. 2016, doi:10.3732/apps.1600035 """ # create eventDate from existing cols - df['eventDate'] = ['-'.join([x,y,z]) for x, y, z in zip(df['Year'], df['Month'], df['Day'])] + df['eventDate'] = ['-'.join([x, y, z]) for x, y, z in zip(df['Year'], df['Month'], df['Day'])] # strip non-numerics out of GPS accuracy value - df['GPS Accuracy'] = df['GPS Accuracy'].str.replace('± ','').str.replace(' m','') + df['GPS Accuracy'] = df['GPS Accuracy'].str.replace('± ', '').str.replace(' m', '') # join multiple terms into scientificName # get all terms into a list - taxonTerms = df[['Genus','Species','Infracategory','InfraTaxa']].add(' ').fillna('').values.tolist() + taxonTerms = df[['Genus', 'Species', 'Infracategory', 'InfraTaxa']].add(' ').fillna('').values.tolist() # replace empty spaces to NaN sciNames = pd.Series([''.join(x).strip(' ') for x in taxonTerms]).replace('^$', np.nan, regex=True) # replace NaN to '' @@ -907,14 +934,14 @@ def convertColectoRFormat(self, df): df['scientificName'] = sciNames # join multiple terms into occurrenceRemarks # get all terms into a list - notesTerms = df[['Description','Notes','Additional_notes']].add(' ').fillna('').values.tolist() + notesTerms = df[['Description', 'Notes', 'Additional_notes']].add(' ').fillna('').values.tolist() # replace empty spaces to NaN occNotes = pd.Series([''.join(x).strip(' ') for x in notesTerms]).replace('^$', np.nan, regex=True) # replace NaN to '' occNotes = occNotes.where(occNotes.notnull(), '') df['occurrenceRemarks'] = occNotes # copy Number, so when choosing index names "Number" is still present. - numSeq = [f"-{x+1}" for x in range(len(df))] + numSeq = [f"-{x + 1}" for x in range(len(df))] df['recordNumber'] = df['Number'] + numSeq # nullify "Project" named if data appears to be a private variable df.loc[df["Project"] == "_toProyecto_", "Project"] = "" @@ -929,10 +956,10 @@ def convertColectoRFormat(self, df): 'GPS Accuracy': 'coordinateUncertaintyInMeters', 'Altitude': 'minimumElevationInMeters', 'Project': 'Label Project'} - df.rename(colNameMap, axis='columns', inplace=True) + df.rename(colNameMap, axis='columns', inplace=True) return df - def new_Records(self, skipDialog = False): + def new_Records(self, skipDialog=False): # is triggered by the action_new_Records. """Clears all the data and makes a new table if skipDialog is True, it won't ask.""" @@ -940,64 +967,64 @@ def new_Records(self, skipDialog = False): if skipDialog: ret = QMessageBox.Yes else: - ret = qm.question(self.parent, '', 'Load a blank data set? (any unsaved progress will be lost)', qm.Yes | qm.No) + ret = qm.question(self.parent, '', 'Load a blank data set? (any unsaved progress will be lost)', + qm.Yes | qm.No) if ret == qm.Yes: if not skipDialog: self.addToUndoList(f'loaded new, blank site data') # set checkpoint in undostack newDFDict = { - 'siteNumber':['1','1'], - 'specimenNumber':['#','1'], - 'recordNumber':['1-#','1-1'], - 'catalogNumber':['',''], - 'family':['',''], - 'scientificName':['',''], - 'genus':['',''], - 'scientificNameAuthorship':['',''], - 'taxonRemarks':['',''], - 'identifiedBy':['',''], - 'dateIdentified':['',''], - 'identificationReferences':['',''], - 'identificationRemarks':['',''], - 'collector':['',''], - 'collectorNumber':['',''], - 'associatedCollectors':['',''], - 'eventDate':['',''], - 'verbatimEventDate':['',''], - 'habitat':['',''], - 'substrate':['',''], - 'occurrenceRemarks':['',''], - 'informationWithheld':['',''], - 'associatedOccurrences':['',''], - 'dataGeneralizations':['',''], - 'associatedTaxa':['',''], - 'dynamicProperties':['',''], - 'description':['',''], - 'reproductiveCondition':['',''], - 'cultivationStatus':['',''], - 'establishmentMeans':['',''], - 'lifeStage':['',''], - 'sex':['',''], - 'individualCount':['',''], - 'country':['',''], - 'stateProvince':['',''], - 'county':['',''], - 'municipality':['',''], - 'path':['',''], - 'locality':['',''], - 'localitySecurity':['',''], - 'decimalLatitude':['',''], - 'decimalLongitude':['',''], - 'coordinateUncertaintyInMeters':['',''], - 'verbatimCoordinates':['',''], - 'minimumElevationInMeters':['',''], - 'verbatimElevation':['',''], - 'duplicateQuantity':['',''], - 'Label Project':['','']} - + 'siteNumber': ['1', '1'], + 'specimenNumber': ['#', '1'], + 'recordNumber': ['1-#', '1-1'], + 'catalogNumber': ['', ''], + 'family': ['', ''], + 'scientificName': ['', ''], + 'genus': ['', ''], + 'scientificNameAuthorship': ['', ''], + 'taxonRemarks': ['', ''], + 'identifiedBy': ['', ''], + 'dateIdentified': ['', ''], + 'identificationReferences': ['', ''], + 'identificationRemarks': ['', ''], + 'collector': ['', ''], + 'collectorNumber': ['', ''], + 'associatedCollectors': ['', ''], + 'eventDate': ['', ''], + 'verbatimEventDate': ['', ''], + 'habitat': ['', ''], + 'substrate': ['', ''], + 'occurrenceRemarks': ['', ''], + 'informationWithheld': ['', ''], + 'associatedOccurrences': ['', ''], + 'dataGeneralizations': ['', ''], + 'associatedTaxa': ['', ''], + 'dynamicProperties': ['', ''], + 'description': ['', ''], + 'reproductiveCondition': ['', ''], + 'cultivationStatus': ['', ''], + 'establishmentMeans': ['', ''], + 'lifeStage': ['', ''], + 'sex': ['', ''], + 'individualCount': ['', ''], + 'country': ['', ''], + 'stateProvince': ['', ''], + 'county': ['', ''], + 'municipality': ['', ''], + 'path': ['', ''], + 'locality': ['', ''], + 'localitySecurity': ['', ''], + 'decimalLatitude': ['', ''], + 'decimalLongitude': ['', ''], + 'coordinateUncertaintyInMeters': ['', ''], + 'verbatimCoordinates': ['', ''], + 'minimumElevationInMeters': ['', ''], + 'verbatimElevation': ['', ''], + 'duplicateQuantity': ['', ''], + 'Label Project': ['', '']} + df = pd.DataFrame.from_dict(newDFDict) - df.fillna('') # make any nans into empty strings. + df.fillna('') # make any nans into empty strings. self.update(df) # this function actually updates the visible dataframe self.parent.populateTreeWidget() self.parent.form_view.fillFormFields() return - diff --git a/ui/printlabels.py b/ui/printlabels.py index 78974a6..24c0fe6 100644 --- a/ui/printlabels.py +++ b/ui/printlabels.py @@ -270,8 +270,11 @@ def gpsCoordStringer(textfield1,textfield2,textfield3,textfield4,styleKey): if (dfl(textfield1) and dfl(textfield2)): # min([len(dfl(textfield1)),len(dfl(textfield2))]) testing length control. gpsString.append('GPS: ' + dfl(textfield1) + ', ' + dfl(textfield2)) - if dfl(textfield3): - gpsString.append(' ± ' + str(round(float(dfl(textfield3)),0)).split('.')[0] + 'm') + try: + if dfl(textfield3): + gpsString.append(' ± ' + str(round(float(dfl(textfield3)),0)).split('.')[0] + 'm') + except ValueError: + pass if dfl(textfield4): gpsString.append(', Elevation: ' + dfl(textfield4) + 'm') @@ -435,7 +438,7 @@ def dfl(key): # dict lookup helper function [row6_7], [row7]] - #Testing if GPS String can fit on one row with the field number. If not, split them into two rows. + #Testing if GPS String can fit on one row with the Collection Number. If not, split them into two rows. gpsStrElement = gpsCoordStringer('decimalLatitude', 'decimalLongitude', 'coordinateUncertaintyInMeters', 'minimumElevationInMeters','rightSTYSmall') try: gpsStrElement.wrap(self.xPaperSize * .98 , self.yPaperSize * .98) @@ -447,7 +450,7 @@ def dfl(key): # dict lookup helper function gpsParaWidth = 0 if gpsParaWidth > self.xPaperSize * .65: - row8 = Table([[Para('recordNumber','default','Field Number: ')]], style = tableSty) + row8 = Table([[Para('recordNumber','default','Collection Number: ')]], style = tableSty) row9 = Table([[gpsStrElement]],style = tableSty) tableList.append([row8]) @@ -458,7 +461,7 @@ def dfl(key): # dict lookup helper function else: row8 = Table([[ - Para('recordNumber','default','Field Number: '), + Para('recordNumber','default','Collection Number: '), gpsStrElement]], colWidths = (self.xPaperSize * .33, self.xPaperSize * .65), rowHeights = None,style=tableSty) tableList.append([row8]) diff --git a/ui/taxonomy.py b/ui/taxonomy.py index a91da64..28304a6 100644 --- a/ui/taxonomy.py +++ b/ui/taxonomy.py @@ -102,93 +102,100 @@ def verifyTaxonomy(self, rowData): """general method to align taxonomy and retrieve authority. accepts a df row argument, treats it as a dictionary and makes refinements. Returning a the modified argument.""" + for col in rowData.keys(): + rowData[col] = str(rowData[col]) if rowData['scientificName'] in ['', None]: return rowData - # ensure the first word is capitalized regardless - rowData['scientificName'] = rowData['scientificName'].capitalize() - rowNum = f"{rowData['siteNumber']}-{rowData['specimenNumber']}" - scientificName = rowData['scientificName'] - scientificNameAuthorship = rowData['scientificNameAuthorship'].strip() - querySciName = self.normalizeStrInput(scientificName) - # check with the session results before moving on. - sessionResults = self.sessionAlignments.get(querySciName, False) - if sessionResults: - sessionName, sessionAuth, sessionFamily = sessionResults - rowData['scientificName'] = sessionName - rowData['scientificNameAuthorship'] = sessionAuth - rowData['family'] = sessionFamily - return rowData - result = self.retrieveAlignment(querySciName) - if result == (False, False, False): - # if the alignment failed to respond - return rowData - resultSciName, resultAuthor, resultFam = result - # Decide how to handle resulting data - keptResult = False # flag to det if the alignment result was kept - changeAuth = False # flag to determine if the authority needs altered. - if resultSciName is None: # if no scientificName was returned - message = f'No {self.value_Kingdom} results for "{scientificName}" (# {rowNum}) found using {self.TaxAlignSource}.\n This may be a typo, would you like to reenter the name?' - reply = self.parent.userSciNameInput(f'{rowNum}: Taxonomic alignment', message) - if reply: - rowData['scientificName'] = reply - rowData = self.verifyTaxonomy(rowData) - return rowData - # if the returned result is not the scientificName, check policies - if resultSciName.lower() != scientificName.lower(): - if self.NameChangePolicy == 'Accept all suggestions': - rowData['scientificName'] = resultSciName - rowData['family'] = resultFam - changeAuth = True - keptResult = True - elif self.NameChangePolicy == 'Always ask': - message = f'Change {scientificName} to {resultSciName} at record {rowNum}?' - answer = self.parent.userAsk(message, 'Taxonomic alignment') - if answer: + try: + # ensure the first word is capitalized regardless + rowData['scientificName'] = rowData['scientificName'].capitalize() + rowNum = f"{rowData['siteNumber']}-{rowData['specimenNumber']}" + scientificName = rowData['scientificName'] + scientificNameAuthorship = rowData['scientificNameAuthorship'].strip() + querySciName = self.normalizeStrInput(scientificName) + # check with the session results before moving on. + sessionResults = self.sessionAlignments.get(querySciName, False) + if sessionResults: + sessionName, sessionAuth, sessionFamily = sessionResults + rowData['scientificName'] = sessionName + rowData['scientificNameAuthorship'] = sessionAuth + rowData['family'] = sessionFamily + return rowData + + result = self.retrieveAlignment(querySciName) + if result == (False, False, False): + # if the alignment failed to respond + return rowData + resultSciName, resultAuthor, resultFam = result + # Decide how to handle resulting data + keptResult = False # flag to det if the alignment result was kept + changeAuth = False # flag to determine if the authority needs altered. + if resultSciName is None: # if no scientificName was returned + message = f'No {self.value_Kingdom} results for "{scientificName}" (# {rowNum}) found using {self.TaxAlignSource}.\n This may be a typo, would you like to reenter the name?' + reply = self.parent.userSciNameInput(f'{rowNum}: Taxonomic alignment', message) + if reply: + rowData['scientificName'] = reply + rowData = self.verifyTaxonomy(rowData) + return rowData + # if the returned result is not the scientificName, check policies + if resultSciName.lower() != scientificName.lower(): + if self.NameChangePolicy == 'Accept all suggestions': rowData['scientificName'] = resultSciName rowData['family'] = resultFam - keptResult = True changeAuth = True - # the returned result is equal to the scientificName... - else: # treat it as if we kept the returned result - keptResult = True - rowData['family'] = resultFam - if changeAuth: - # if the scientificName changed already, update the author - rowData['scientificNameAuthorship'] = resultAuthor - else: - if not keptResult: - # condition to retrieve authority for potentially non-accepted name - # in favor of simplicity, the family name will not be updated under this condition - resultAuthor = self.retrieveAlignment(querySciName, retrieveAuth=True) - if resultAuthor.lower() not in [scientificNameAuthorship.lower(), None]: - # if the authors don't match check user policies - # conditional actions based on AuthChangePolicy - if self.AuthChangePolicy == 'Accept all suggestions': - rowData['scientificNameAuthorship'] = resultAuthor - elif self.AuthChangePolicy == 'Fill blanks': - if scientificNameAuthorship == '': # if it is blank fill it + keptResult = True + elif self.NameChangePolicy == 'Always ask': + message = f'Change {scientificName} to {resultSciName} at record {rowNum}?' + answer = self.parent.userAsk(message, 'Taxonomic alignment') + if answer: + rowData['scientificName'] = resultSciName + rowData['family'] = resultFam + keptResult = True + changeAuth = True + # the returned result is equal to the scientificName... + else: # treat it as if we kept the returned result + keptResult = True + rowData['family'] = resultFam + if changeAuth: + # if the scientificName changed already, update the author + rowData['scientificNameAuthorship'] = resultAuthor + else: + if not keptResult: + # condition to retrieve authority for potentially non-accepted name + # in favor of simplicity, the family name will not be updated under this condition + resultAuthor = self.retrieveAlignment(querySciName, retrieveAuth=True) + + if resultAuthor.lower() not in [scientificNameAuthorship.lower(), None]: + # if the authors don't match check user policies + # conditional actions based on AuthChangePolicy + if self.AuthChangePolicy == 'Accept all suggestions': rowData['scientificNameAuthorship'] = resultAuthor - else: # if not blank, ask. - message = f'Update author of {rowData["scientificName"]} from:\n{scientificNameAuthorship} to {resultAuthor} at record {rowNum}?' + elif self.AuthChangePolicy == 'Fill blanks': + if scientificNameAuthorship == '': # if it is blank fill it + rowData['scientificNameAuthorship'] = resultAuthor + else: # if not blank, ask. + message = f'Update author of {rowData["scientificName"]} from:\n{scientificNameAuthorship} to {resultAuthor} at record {rowNum}?' + answer = self.parent.userAsk(message, 'Authority alignment') + if answer: + rowData['scientificNameAuthorship'] = resultAuthor + + elif self.AuthChangePolicy == 'Always ask': + if scientificNameAuthorship == '': # custom dialog box if the field was empty. 'Always ask' may be annoying! + message = f'Fill in blank author of {rowData["scientificName"]} to {resultAuthor} at record {rowNum}?' + else: + message = f'Update author of {rowData["scientificName"]} from:\n{scientificNameAuthorship} to {resultAuthor} at record {rowNum}?' answer = self.parent.userAsk(message, 'Authority alignment') if answer: rowData['scientificNameAuthorship'] = resultAuthor - - elif self.AuthChangePolicy == 'Always ask': - if scientificNameAuthorship == '': # custom dialog box if the field was empty. 'Always ask' may be annoying! - message = f'Fill in blank author of {rowData["scientificName"]} to {resultAuthor} at record {rowNum}?' - else: - message = f'Update author of {rowData["scientificName"]} from:\n{scientificNameAuthorship} to {resultAuthor} at record {rowNum}?' - answer = self.parent.userAsk(message, 'Authority alignment') - if answer: - rowData['scientificNameAuthorship'] = resultAuthor - # update sessionAlignments to remember these results for this session - results = (rowData['scientificName'], - rowData['scientificNameAuthorship'], - rowData['family']) - self.sessionAlignments[querySciName] = results + # update sessionAlignments to remember these results for this session + results = (rowData['scientificName'], + rowData['scientificNameAuthorship'], + rowData['family']) + self.sessionAlignments[querySciName] = results + except: + pass return rowData def normalizeStrInput(self, inputStr, retrieveAuth=False):