juan_gandhi: (VP)
[personal profile] juan_gandhi
  // test how file can be unfolded into multiple columns
  public void testUnfoldingColumn() throws IOException {
    // Here's the file
    File file = getFile("smalldata/chicago/chicagoAllWeather.csv");

    // Get all its lines
    final List<String> lines = Files.readLines(file, Charset.defaultCharset());

    // Store it in H2O, with typed column as a wrapper (core H2O storage is a type-unaware Vec class)
    Column<String> source = willDrop(Strings.newColumn(lines));

    // Produce another (virtual) column that stores a list of strings as a row value
    Column<List<String>> split = new UnfoldingColumn<>(Functions.splitBy(","), source, 10);

    // now check that we have the right data
    for (int i = 0; i < lines.size(); i++) {
      // Since we specified width (10), the rest of the list is filled with nulls; have to ignore them.
      // It's important to have the same width for the whole frame..
      String actual = StringUtils.join(" ", Predicate.NOT_NULL.filter(split.apply(i)));
      // So, have we lost any data?
      assertEquals(lines.get(i).replaceAll("\\,", " ").trim(), actual);
    }
  }
This account has disabled anonymous posting.
If you don't have an account you can create one now.
HTML doesn't work in the subject.
More info about formatting

Profile

juan_gandhi: (Default)
Juan-Carlos Gandhi

May 2025

S M T W T F S
    1 2 3
456 7 8 9 10
11 121314151617
18192021222324
25262728293031

Most Popular Tags

Style Credit

Expand Cut Tags

No cut tags
Page generated May. 15th, 2025 07:40 pm
Powered by Dreamwidth Studios