package mi.hdm.filesystem; import mi.hdm.recipes.Ingredient; import mi.hdm.recipes.Measurement; import mi.hdm.recipes.NutritionTable; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; import java.util.InvalidPropertiesFormatException; import java.util.List; public class CSVParser { private static final Logger log = LogManager.getLogger(CSVParser.class); /** * @param filepath Path where the CSV file is located * @param split The character that the columns are split by * @param extract Names of the columns to extract with extract[0] being the name of the ingredient * @return list of all ingredients inside the csv */ public List<Ingredient> getIngredientsFromCSV(String filepath, char split, String... extract) throws IOException { log.info("Trying to read ingredients from CSV: {}", filepath); //try-block with automatic resource management try (BufferedReader reader = new BufferedReader(new FileReader(filepath))) { final String header = reader.readLine(); //read first line of CSV final int[] indexes = getColumnIndexes(header, split, extract); //Loop through lines final List<Ingredient> ingredients = reader.lines() .map(line -> getIngredientFromLine(line, split, indexes)) .toList(); if (ingredients.size() > 0) { log.info("Found and parsed {} ingredients.", ingredients.size()); } else { log.warn("No ingredients found in CSV, returning empty list."); } return ingredients; } catch (FileNotFoundException fileNotFoundException) { final Path filename = Path.of(filepath).getFileName(); log.error("File '{}' not found at specified filepath: {}.", filename, filepath); fileNotFoundException.printStackTrace(); throw fileNotFoundException; } catch (IOException io) { log.error("An error occurred while reading CSV: {}", filepath); throw io; } } private Ingredient getIngredientFromLine(String line, char split, int[] idx) throws NumberFormatException { log.debug("Trying to parse line {}", line); final Measurement measurement = Measurement.GRAM; final List<String> splitLine = splitLine(line, split); final String recipeName = splitLine.get(idx[0]); final List<Double> nutrition = new ArrayList<>(); for (int i = 1; i < idx.length; i++) { String element = splitLine.get(idx[i]).split(" ")[0]; double quantity = parseNumberFromString(element); if (getMeasurementFromString(element).equals("mg")) { quantity = quantity /1000; } nutrition.add(quantity); } final NutritionTable nutritionTable = new NutritionTable(nutrition); return new Ingredient(measurement, recipeName, nutritionTable); } private int[] getColumnIndexes(String header, char splitChar, String... extract) throws InvalidPropertiesFormatException { final String[] split = header.split(String.valueOf(splitChar)); //split header of CSV //find indexes of columns that need to be extracted int[] colIndexes = new int[extract.length]; for (int i = 0; i < extract.length; i++) { int idx = firstOccurrenceOf(split, extract[i]); if (idx < 0) { log.error("The specified column '{}' does not exist in the provided CSV file.", extract[i]); throw new InvalidPropertiesFormatException(String.format("Column %s does not exist in this CSV file.%n", extract[i])); } colIndexes[i] = idx; } return colIndexes; } /** * Returns the index of the first occurence of the specified element in the array * * @param arr The array that should be searched through * @param element The element that should be identified in the array * @return the index of the first occurence of the specified element in the array, or -1 if it does not exist in the array */ private <T> int firstOccurrenceOf(T[] arr, T element) { for (int i = 0; i < arr.length; i++) { if (arr[i].equals(element)) return i; } return -1; } /** * Get only the quantity of a nutrition value without its measurement * * @param candidate the nutrition value the quantity is extracted from * @return quantity of nutrition value as double (that is without measurement) */ private double parseNumberFromString(String candidate) { if (candidate.isBlank()) return 0.0; StringBuilder numValue = new StringBuilder(); boolean hasDot = false; for (int i = 0; i < candidate.length(); i++) { char c = candidate.charAt(i); if (Character.isDigit(c)) { numValue.append(c); } else if (!hasDot && numValue.length() > 0 && c == '.') { hasDot = true; numValue.append(c); } else { break; } } return Double.parseDouble(numValue.toString()); } /** * Get only the measurement of a nutrition value * * @param candidate the nutrition value the measurement is extracted from * @return measurement of nutrition value as String */ private String getMeasurementFromString(String candidate) { if (candidate.isBlank()) return ""; StringBuilder unit = new StringBuilder(); for (int i = 0; i < candidate.length(); i++) { char c = candidate.charAt(i); if (!Character.isDigit(c) && !(c == ' ')) { unit.append(c); } } return unit.toString(); } /** * Split a line of CSV in its individual tokens based on a character while accounting for strings inside of columns * * @return list of tokens inside the provided line */ private List<String> splitLine(String line, char splitChar) { final List<String> output = new ArrayList<>(); StringBuilder builder = new StringBuilder(); boolean inQuotes = false; for (int i = 0; i < line.length(); i++) { final char c = line.charAt(i); if (c == '"') { inQuotes = !inQuotes; } else if (c == splitChar && !inQuotes) { output.add(builder.toString()); builder = new StringBuilder(); } else { builder.append(c); } } //when end is reached, dump the rest contents of string builder to the list (if not empty) if (!builder.isEmpty()) { output.add(builder.toString()); } return output; } }