Skip to content

Commit

Permalink
Can now re-run process without re-parsing the CSV file
Browse files Browse the repository at this point in the history
  • Loading branch information
seab authored and seab committed Apr 13, 2023
1 parent b20a2fe commit 452f4d2
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 77 deletions.
24 changes: 16 additions & 8 deletions src/main/java/fun/seabird/MediaSortCmd.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

public class MediaSortCmd
{
private String mediaPath;
private Path mediaPath;
private Long hrsOffset = 0l;

private Path csvFile;
private boolean reParseCsv=true;

private boolean sepYear = false;
private boolean transcodeVideos = false;
Expand All @@ -27,13 +29,7 @@ public boolean isSepYear() {
}
public void setSepYear(boolean sepYear) {
this.sepYear = sepYear;
}
public String getMediaPath() {
return mediaPath;
}
public void setMediaPath(String mediaPath) {
this.mediaPath = mediaPath;
}
}
public Path getCsvFile() {
return csvFile;
}
Expand Down Expand Up @@ -64,5 +60,17 @@ public boolean isTranscodeVideos() {
public void setTranscodeVideos(boolean transcodeVideos) {
this.transcodeVideos = transcodeVideos;
}
public Path getMediaPath() {
return mediaPath;
}
public void setMediaPath(Path mediaPath) {
this.mediaPath = mediaPath;
}
public boolean isReParseCsv() {
return reParseCsv;
}
public void setReParseCsv(boolean reParseCsv) {
this.reParseCsv = reParseCsv;
}

}
91 changes: 55 additions & 36 deletions src/main/java/fun/seabird/MediaSortTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Stream;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.imaging.ImageReadException;
import org.apache.commons.imaging.ImageWriteException;
Expand Down Expand Up @@ -66,24 +68,27 @@ public class MediaSortTask extends Task<Path> {
public static final Set<String> imageExtensions = Set.of("jpg", "jpeg", "png", "crx", "crw", "cr2", "cr3", "crm",
"arw", "nef", "orf", "raf");

static final String OUTPUT_FOLDER_NAME = "ebird";
public static final String OUTPUT_FOLDER_NAME = "ebird";

static final long MAX_ML_UPLOAD_SIZE_VIDEO = 250l;
static final String TRANSCODED_VIDEO_SUFFIX = "_s";
private static final long MAX_ML_UPLOAD_SIZE_VIDEO = 250l;
private static final String TRANSCODED_VIDEO_SUFFIX = "_s";

final String[] invalidChars = new String[] { " ", ":", ",", ".", "/", "\\", ">", "<" };
final String[] validChars = new String[] { "-", "--", "-", "-", "-", "-", "-", "-" };
private static final String[] invalidChars = new String[] { " ", ":", ",", ".", "/", "\\", ">", "<" };
private static final String[] validChars = new String[] { "-", "--", "-", "-", "-", "-", "-", "-" };

private final List<CreationDateProvider> creationDateProviders = List.of(new ExifCreationDateProvider(),
private static final List<CreationDateProvider> creationDateProviders = List.of(new ExifCreationDateProvider(),
new FileNameCreationDateProvider(), new FileModifiedCreationDateProvider());
private final ReadWriteLock rangeMapLock = new ReentrantReadWriteLock();
private final RangeMap<LocalDateTime, String> rangeMap = TreeRangeMap.create();
private final Map<String, SubStats> checklistStatsMap = new ConcurrentSkipListMap<>();

//eBird CSV fields
private static final int CSV_BATCH_SIZE = 50000;
private static final ReadWriteLock rangeMapLock = new ReentrantReadWriteLock();
private static final RangeMap<LocalDateTime, String> rangeMap = TreeRangeMap.create();
private static final Map<String, SubStats> checklistStatsMap = new ConcurrentSkipListMap<>();
private static final AtomicInteger linesProcessed = new AtomicInteger(0);

private final MediaSortCmd msc;

private transient Process process;

private static final int CSV_BATCH_SIZE = 50000;
private transient Process process;

public MediaSortTask(MediaSortCmd msc) {
this.msc = msc;
Expand All @@ -95,9 +100,12 @@ public MediaSortTask(MediaSortCmd msc) {
*
* @param record The CSV record to be parsed.
*/
private void parseCsvLine(CSVRecord record) {
private void parseCsvLine(CSVRecord record)
{
if (record.getRecordNumber() == 1l)
return; // skip the header

linesProcessed.incrementAndGet();

long duration = 0;
String durationStr = record.get(14);
Expand Down Expand Up @@ -141,6 +149,12 @@ private void parseCsvLine(CSVRecord record) {
}
}

/**
* Parses eBird CSV file using Apache Commons CSV library, and processes each line in parallel.
*
* @param csvFile The path to the CSV file to be parsed.
* @throws IOException If an I/O error occurs while reading the CSV file.
*/
public void parseCsv(Path csvFile) throws IOException {
logger.info("Parsing " + csvFile + "...");

Expand All @@ -155,7 +169,7 @@ public void parseCsv(Path csvFile) throws IOException {
.block();

stopwatch.stop();
logger.info("Parsed eBird data in " + stopwatch);
logger.info("Parsed " + linesProcessed.get() + " eBird observations in " + stopwatch.getTime(TimeUnit.SECONDS) + " seconds");
}
}

Expand Down Expand Up @@ -390,10 +404,17 @@ private void checkMetadataAndMove(Path file, Path outputDir, Long hrsOffset, Set
*/
@Override
protected Path call() throws Exception {
if (msc.getCsvFile() != null)
if (msc.getCsvFile() != null && msc.isReParseCsv())
{
rangeMap.clear();
checklistStatsMap.clear();
linesProcessed.set(0);

parseCsv(msc.getCsvFile());
msc.setReParseCsv(false);
}

Path mediaPath = Path.of(msc.getMediaPath());
Path mediaPath = msc.getMediaPath();

// make output directory inside the provided media folder
String outputDirName = OUTPUT_FOLDER_NAME + "_" + new Date().getTime();
Expand Down Expand Up @@ -456,29 +477,27 @@ protected Path call() throws Exception {
else
Files.move(outputDir, finalOutputDir, StandardCopyOption.ATOMIC_MOVE);
}

Path indexPath = null;
if (!subIds.isEmpty()) {
indexPath = mediaPath.resolve("checklistIndex_" + new Date().getTime() + ".csv");
try (BufferedWriter bw = Files.newBufferedWriter(indexPath, StandardCharsets.UTF_8,
StandardOpenOption.CREATE)) {
bw.write("Checklist Link,Date,State,County,Num Uploaded Assets,Num Local Assets\n");
for (String subId : subIds) {
SubStats ss = checklistStatsMap.get(subId);
bw.write("https://ebird.org/checklist/" + subId + "/media,");
bw.write(ss.getDate() + ",");
bw.write(ss.getSubnational1Code() + ",");
bw.write(ss.getCounty() + ",");
bw.write(ss.getNumAssetsUploaded() + ",");
bw.write(ss.getNumAssetsLocal() + "\n");
}
}
}


Path resultsFile = null;
if (!subIds.isEmpty()) {
resultsFile = mediaPath.resolve("checklistIndex_" + new Date().getTime() + ".csv");
try (BufferedWriter bw = Files.newBufferedWriter(resultsFile, StandardCharsets.UTF_8,StandardOpenOption.CREATE);
CSVPrinter csvPrinter = new CSVPrinter(bw, CSVFormat.DEFAULT.builder().setHeader("Checklist Link", "Date", "State", "County", "Num Uploaded Assets", "Num Local Assets").build()))
{
for (String subId : subIds) {
SubStats ss = checklistStatsMap.get(subId);
csvPrinter.printRecord("https://ebird.org/checklist/" + subId + "/media",ss.getDate(), ss.getSubnational1Code(), ss.getCounty(),ss.getNumAssetsUploaded(), ss.getNumAssetsLocal());
}
csvPrinter.flush();
} catch (IOException e) {
e.printStackTrace();
}
}

updateProgress(1.0, 1.0);

logger.info("ALL DONE! :-)");
return indexPath;
return resultsFile;

}

Expand Down
58 changes: 25 additions & 33 deletions src/main/java/fun/seabird/MediaSorterApplication.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ResourceBundle;

import fun.seabird.MediaSortCmd.FolderGroup;
import javafx.application.Application;
Expand All @@ -27,56 +28,46 @@

public class MediaSorterApplication extends Application
{
private static String titleText = "eBird Media Sorter";
private static final int FRAME_WIDTH = 650;
private static final int FRAME_HEIGHT = 800;

private static int FRAME_WIDTH = 650;
private static int FRAME_HEIGHT = 800;

private static String introText = "Welcome! Choose your media directory, eBird CSV file, options, and press run.";
private static String subDirText = "Create Subdirectory";
private static String browseBtnText = "Choose Media Directory";
private static String resBtnText = "See Results!";
private static String exifAdjText = "EXIF Adjustment (0 hours)";
private static String csvBtnText = "Choose MyEBirdData CSV File";
private static String sepYearText = "Create Parent Folders by Year";
private static String locSortText = "Create Subfolders by Location";
private static String symbLinkText = "Generate Symbolic Links Instead of Moving Files";
private static String runBtnText = "Run";
private static String transcodeVidText = "Transcode large video (.mp4) files";

private ExtensionFilter csvFilter = new ExtensionFilter("CSV Files","*.csv");
private static final String UI_PROPERTIES_FILE_BASE = "ui";

private static final ExtensionFilter csvFilter = new ExtensionFilter("CSV Files","*.csv");

public static final TextArea OUTPUT_LOG = new TextArea();
static final TextArea OUTPUT_LOG = new TextArea();

@Override
public void start(Stage s) throws Exception
{
s.setTitle(titleText);
ResourceBundle msgs = ResourceBundle.getBundle(UI_PROPERTIES_FILE_BASE);

s.setTitle(msgs.getString("titleText"));

final MediaSortCmd msc = new MediaSortCmd();
final MediaSortResult msr = new MediaSortResult();

Label introLbl = new Label(introText);
Label introLbl = new Label(msgs.getString("introText"));

Button browseBut = new Button(browseBtnText);
Button browseBut = new Button(msgs.getString("browseBtnText"));
Label browseButLbl = new Label();

Label offsetLbl = new Label(exifAdjText);
Label offsetLbl = new Label(msgs.getString("exifAdjText"));
Slider offsetSlider = new Slider(-6,6,0);
offsetSlider.setSnapToTicks(true);
offsetSlider.setMajorTickUnit(1);

Button csvBrowse = new Button(csvBtnText);
Button csvBrowse = new Button(msgs.getString("csvBtnText"));
Label csvBrowseLbl = new Label();

CheckBox locSortCb = new CheckBox(locSortText);
CheckBox sepYearDirCb = new CheckBox(sepYearText);
CheckBox parentDirCb = new CheckBox(subDirText);
CheckBox locSortCb = new CheckBox(msgs.getString("locSortText"));
CheckBox sepYearDirCb = new CheckBox(msgs.getString("sepYearText"));
CheckBox parentDirCb = new CheckBox(msgs.getString("subDirText"));
parentDirCb.setSelected(true);
CheckBox symbLinkCb = new CheckBox(symbLinkText);
CheckBox transcodeVidCb = new CheckBox(transcodeVidText);
CheckBox symbLinkCb = new CheckBox(msgs.getString("symbLinkText"));
CheckBox transcodeVidCb = new CheckBox(msgs.getString("transcodeVidText"));

Button runBut = new Button(runBtnText);
Button runBut = new Button(msgs.getString("runBtnText"));
runBut.setDisable(true);

ProgressBar pb = new ProgressBar(0.0);
Expand All @@ -88,7 +79,7 @@ public void start(Stage s) throws Exception
ScrollPane scroll = new ScrollPane (OUTPUT_LOG);
scroll.setVisible(false);

Button resBtn = new Button(resBtnText);
Button resBtn = new Button(msgs.getString("resBtnText"));
resBtn.setDisable(true);
resBtn.setVisible(false);

Expand All @@ -101,10 +92,10 @@ public void start(Stage s) throws Exception
if (selectedFile != null)
{
String path = selectedFile.getPath();
msc.setMediaPath(path);
msc.setMediaPath(selectedFile.toPath());
browseButLbl.setText(path);
runBut.setDisable(false);
parentDirCb.setText(subDirText + " " + msc.getMediaPath() + File.separator + MediaSortTask.OUTPUT_FOLDER_NAME);
parentDirCb.setText(msgs.getString("subDirText") + " " + msc.getMediaPath().resolve(MediaSortTask.OUTPUT_FOLDER_NAME));
}
}
);
Expand All @@ -123,7 +114,7 @@ public void start(Stage s) throws Exception
}
else
{
offsetLbl.setText(exifAdjText);
offsetLbl.setText(msgs.getString("exifAdjText"));
symbLinkCb.setDisable(false);
}

Expand All @@ -140,6 +131,7 @@ public void start(Stage s) throws Exception

if (f != null)
{
msc.setReParseCsv(true);
msc.setCsvFile(f.toPath());
csvBrowseLbl.setText(f.getPath());
}
Expand Down
12 changes: 12 additions & 0 deletions src/main/resources/ui.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
titleText=eBird Media Sorter
introText=Welcome! Choose your media directory, eBird CSV file, options, and press run.
subDirText=Create Subdirectory
browseBtnText=Choose Media Directory
resBtnText=See Results!
exifAdjText=EXIF Adjustment (0 hours)
csvBtnText=Choose MyEBirdData CSV File
sepYearText=Create Parent Folders by Year
locSortText=Create Subfolders by Location
symbLinkText=Generate Symbolic Links Instead of Moving Files
runBtnText=Run
transcodeVidText=Transcode large video (.mp4) files

0 comments on commit 452f4d2

Please sign in to comment.