Commit 634c4978 authored by David Nichols's avatar David Nichols
Browse files

added CSV and Excel building blocks + tests

parent 75b8e829
Pipeline #25492 passed with stage
in 17 minutes and 4 seconds
......@@ -42,9 +42,25 @@ public class ExcelIterator extends qore.Qore.AbstractIterator {
public ExcelIterator(java.io.InputStream stream, String sheet_name) throws Throwable {
XSSFWorkbook workbook = new XSSFWorkbook(stream);
sheet = workbook.getSheet(sheet_name);
if (sheet == null) {
throw new RuntimeException(String.format("sheet %s is unknown", sheet));
if (sheet_name == null || sheet_name.isEmpty()) {
sheet = workbook.getSheetAt(0);
if (sheet == null) {
throw new RuntimeException("the spreadsheet has no worksheets");
}
} else {
sheet = workbook.getSheet(sheet_name);
if (sheet == null) {
try {
int sheet_no = Integer.parseInt(sheet_name);
sheet = workbook.getSheetAt(sheet_no);
} catch (NumberFormatException e) {
// ignore exception
}
}
if (sheet == null) {
throw new RuntimeException(String.format("sheet %s is unknown", sheet));
}
System.out.printf("using sheet %s\n", sheet_name);
}
}
......
......@@ -111,34 +111,41 @@ public class ExcelReadDataProvider inherits DataProvider::AbstractDataProvider {
/** @param path the input file to iterate
@param opts a hash of optional options
@throw ABSTRACTEXCELITERATOR-ERROR invalid or unknown option; invalid data type for option; \c "header_names"
@throw EXCEL-READ-OPTION-ERROR invalid or unknown option; invalid data type for option; \c "header_names"
is @ref True "True" and \c "headers" is also present; unknown field type
@throw EXCEL-INVALID-LOCATION invalid excel location or range string
*/
constructor(string path, *hash<auto> opts) {
name = path;
self.path = path;
self.opts = parseOptions(opts);
self.opts = processCellOptions(checkOptions("EXCEL-READ-OPTION-ERROR", ConstructorOptions, opts));
}
#! Creates the ExcelReadDataProvider with the input data stream and optionally an option hash
/** @param stream the input stream to iterate
@param opts a hash of optional options
@throw ABSTRACTEXCELITERATOR-ERROR invalid or unknown option; invalid data type for option; \c "header_names"
@throw EXCEL-READ-OPTION-ERROR invalid or unknown option; invalid data type for option; \c "header_names"
is @ref True "True" and \c "headers" is also present; unknown field type
@throw EXCEL-INVALID-LOCATION invalid excel location or range string
*/
constructor(InputStream stream, *hash<auto> opts) {
i = stream;
self.opts = parseOptions(opts);
self.opts = processCellOptions(checkOptions("EXCEL-READ-OPTION-ERROR", ConstructorOptions, opts));
name = sprintf("stream=%s", stream.uniqueHash());
}
#! Creates the object from constructor options
/**
@throw EXCEL-READ-OPTION-ERROR invalid or unknown option; invalid data type for option; \c "header_names"
is @ref True "True" and \c "headers" is also present; unknown field type
@throw EXCEL-INVALID-LOCATION invalid excel location or range string
*/
constructor(*hash<auto> options) {
*hash<auto> copts = parseOptions(options);
*hash<auto> copts = processCellOptions(checkOptions("EXCEL-READ-OPTION-ERROR", ConstructorOptions, options));
if (copts.path) {
if (copts.stream) {
error("CONSTRUCTOR-ERROR", "cannot provide both \"stream\" and \"path\" options; use either one or "
error("EXCEL-READ-OPTION-ERROR", "cannot provide both \"stream\" and \"path\" options; use either one or "
"the other");
}
name = path = copts.path;
......@@ -158,9 +165,8 @@ public class ExcelReadDataProvider inherits DataProvider::AbstractDataProvider {
return name;
}
#! Parses options
*hash<auto> parseOptions(*hash<auto> options, string error = "CONSTRUCTOR-ERROR") {
*hash<auto> opts = checkOptions(error, ConstructorOptions, options);
#! Process cell options
static *hash<auto> processCellOptions(*hash<auto> opts) {
if (opts.header_cells) {
opts.header_cells = ExcelReadDataProvider::parseRange(opts.header_cells);
}
......@@ -168,11 +174,11 @@ public class ExcelReadDataProvider inherits DataProvider::AbstractDataProvider {
opts.data_cells = ExcelReadDataProvider::parseRange(opts.data_cells);
if (opts.header_cells) {
if (opts.header_cells.start.column > opts.data_cells.start.column) {
throw "INVALID-RANGE", sprintf("start column for header cells (%y) is after the start column for "
throw "EXCEL-INVALID-LOCATION", sprintf("start column for header cells (%y) is after the start column for "
"data cells (%y)", opts.header_cells.start, opts.data_cells.start);
}
if (opts.header_cells.start.row > opts.data_cells.start.row) {
throw "INVALID-RANGE", sprintf("start row for header cells (%y) is after the start row for data "
throw "EXCEL-INVALID-LOCATION", sprintf("start row for header cells (%y) is after the start row for data "
"cells (%y)", opts.header_cells.start, opts.data_cells.start);
}
}
......@@ -181,12 +187,12 @@ public class ExcelReadDataProvider inherits DataProvider::AbstractDataProvider {
}
#! Parses a string that designates a range of cells (ex: \c "a1:e10")
/** @throw INVALID-RANGE invalid range string
/** @throw EXCEL-INVALID-LOCATION invalid excel location or range string
*/
static hash<CellRangeInfo> parseRange(string range) {
(*string start, *string end) = (range =~ x/^((?:[a-z]+)?[0-9]+)(?::((?:[a-z]+)?[0-9]+))?$/i);
if (!start) {
throw "INVALID-RANGE", sprintf("range %y does not have the format [a-z]+[0-9]+:[a-z]+[0-9]+", range);
throw "EXCEL-INVALID-LOCATION", sprintf("range %y does not have the format [a-z]+[0-9]+:[a-z]+[0-9]+", range);
}
hash<CellRangeInfo> rv = <CellRangeInfo>{
"start": ExcelReadDataProvider::parseCellLocation(start),
......@@ -194,10 +200,10 @@ public class ExcelReadDataProvider inherits DataProvider::AbstractDataProvider {
if (end) {
rv.end = ExcelReadDataProvider::parseCellLocation(end);
if (rv.start.column > rv.end.column) {
throw "INVALID-RANGE", sprintf("start column (%y) is after the end column (%y)", start, end);
throw "EXCEL-INVALID-LOCATION", sprintf("start column (%y) is after the end column (%y)", start, end);
}
if (rv.start.row > rv.end.row) {
throw "INVALID-RANGE", sprintf("start row (%y) is after the end row (%y)", start, end);
throw "EXCEL-INVALID-LOCATION", sprintf("start row (%y) is after the end row (%y)", start, end);
}
} else {
rv.end = <CellLocationInfo>{
......@@ -209,7 +215,7 @@ public class ExcelReadDataProvider inherits DataProvider::AbstractDataProvider {
}
#! Parses a string that designates a cell location (ex: \c "a1")
/** @throw INVALID-LOCATION invalid cell location
/** @throw INVALID-LOCATION invalid excel location or range string
*/
static hash<CellLocationInfo> parseCellLocation(string location) {
*list<*string> row_values = (location =~ x/^(?:[a-z]+)?([0-9]+)$/i);
......@@ -218,7 +224,7 @@ public class ExcelReadDataProvider inherits DataProvider::AbstractDataProvider {
"row": (row_values[1] ?? row_values[0] ?? -1).toInt(),
};
if (rv.row < 1) {
throw "INVALID-LOCATION", sprintf("row %y is invalid, rows numbers must be >= 1", rv.row);
throw "EXCEL-INVALID-LOCATION", sprintf("row %y is invalid, rows numbers must be >= 1", rv.row);
}
return rv;
}
......
......@@ -45,8 +45,7 @@ public class ExcelWorksheetReadDataProvider inherits DataProvider::AbstractDataP
const ConstructorOptions = ExcelReadDataProvider::ConstructorOptions + {
"worksheet": <DataProviderOptionInfo>{
"type": AbstractDataProviderType::get(StringType),
"desc": "the name of the worksheet to iterate",
"required": True,
"desc": "the name of the worksheet to iterate; if missing, the first worksheet is processed",
},
};
......@@ -95,6 +94,11 @@ public class ExcelWorksheetReadDataProvider inherits DataProvider::AbstractDataP
self.name = name;
self.sheet = sheet;
self.opts = opts;
setOptions();
}
#! Common setup code
private setOptions() {
if (opts.header_cells) {
i.setHeaderCells(opts.header_cells.start.column, opts.header_cells.start.row,
opts.header_cells.end.column, opts.header_cells.end.row);
......@@ -117,22 +121,24 @@ public class ExcelWorksheetReadDataProvider inherits DataProvider::AbstractDataP
#! Creates the object from constructor options
constructor(*hash<auto> options) {
*hash<auto> copts = checkOptions("CONSTRUCTOR-ERROR", ConstructorOptions, options);
sheet = copts.sheet;
*hash<auto> copts = ExcelReadDataProvider::processCellOptions(checkOptions("EXCEL-READ-OPTION-ERROR",
ConstructorOptions, options));
sheet = copts.worksheet ?? "";
if (copts.path) {
if (copts.stream) {
error("CONSTRUCTOR-ERROR", "cannot provide both \"stream\" and \"path\" options; use either one or "
error("EXCEL-READ-OPTION-ERROR", "cannot provide both \"stream\" and \"path\" options; use either one or "
"the other");
}
i = new ExcelIterator(new FileInputStream(copts.path), sheet);
i = new ExcelIterator(copts.path, sheet);
} else if (copts.stream) {
i = new ExcelIterator(copts.stream, sheet);
}
self.opts = copts - ("stream", "path");
self.opts = copts - ("stream", "path", "worksheet");
if (!i) {
# create a dummy interator with no input
i = new ExcelIterator(new StringInputStream(""), sheet);
}
setOptions();
}
#! Returns an iterator for zero or more records matching the search options
......
......@@ -9,11 +9,11 @@ Building blocks are meant to provide reusable elements to solve technical challe
**NOTE: The building blocks here are made available under the Apache license 2.0:**
>
> Copyright 2021 Qore Technologies, s.r.o.
>
>
> Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
>
>
> http://www.apache.org/licenses/LICENSE-2.0
>
>
> Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Contact Qore Technologies (info@qoretechnologies.com) if you need support for these building blocks.
......@@ -36,6 +36,7 @@ The following building blocks are available:
|[BBM_AutoMapperRecord](./generic/dataprovider/BBM_AutoMapperRecord-1.0.qclass.yaml)|base building block for running a mapper in autonomous mode with request/record-based recovery logic
|[BBM_AutoMapperRecordStep](./step/dataprovider/BBM_AutoMapperRecordStep-1.0.qclass.yaml)|step building block to map record-based data from an output provider to an input provider with config-based error recovery support
|[BBM_AutoMapperRequest](./generic/dataprovider/BBM_AutoMapperRequest-1.0.qclass.yaml)|base building block for running a mapper in autonomous mode with request/response-based recovery logic
|[BBM_CsvReadDataProvider](./generic/dataprovider/BBM_CsvReadDataProvider-1.0.qclass.yaml)|base building block for processing CSV data as input; implements an output connector
|[BBM_DataProviderRecordCreate](./generic/dataprovider/BBM_DataProviderRecordCreate-1.0.qclass.yaml)|building block for record-based data providers for creating records
|[BBM_DataProviderRecordUpdate](./generic/dataprovider/BBM_DataProviderRecordUpdate-1.0.qclass.yaml)|building block for record-based data providers for updating records, includes an input/output connector
|[BBM_DataProviderRecordCreateBase](./generic/dataprovider/BBM_DataProviderRecordCreateBase-1.0.qclass.yaml)|base building block for record-based data providers for record creation
......@@ -46,6 +47,7 @@ The following building blocks are available:
|[BBM_DataProviderRequestWithRecoveryStep](./step/dataprovider/BBM_DataProviderRequestWithRecoveryStep-1.0.qstep.yaml)|step building block for request-reply data providers with config-based error recovery support
|[BBM_DataProviderSearch](./generic/dataprovider/BBM_DataProviderSearch-1.0.qclass.yaml)|performs a search in a record-based data provider and returns the result, includes input/output connectors
|[BBM_DataProviderSearchBase](./generic/dataprovider/BBM_DataProviderSearchBase-1.0.qclass.yaml)|base class for using the data provider search API
|[BBM_ExcelReadDataProvider](./generic/dataprovider/BBM_ExcelReadDataProvider-1.0.qclass.yaml)|base building block for processing Excel spreadsheet data as input; implements an output connector
|[BBM_GenericMapper](./generic/dataprovider/BBM_GenericMapper-1.0.qclass.yaml)|building block for generic data transformation support based on a mapper; includes an input/output connector
### Filesystem Event Building Blocks
......@@ -115,7 +117,7 @@ The following building blocks are available:
|Building Block|Description|
|---|---|
|[BBM_SalesforceStreamBase](./service/salesforce/BBM_SalesforceStreamBase-v1.0.qclass.yaml)|base class for **Salesforce** streaming API support, provides an event source connector
|[BBM_SalesforceStreamCreateOrder](./service/salesforce/BBM_SalesforceStreamCreateOrder-v1.0.qclass.yaml)|base class for creating workflow orders based on **Salesforce** events
|[BBM_SalesforceStreamCreateOrder](./service/salesforce/BBM_SalesforceStreamCreateOrder-v1.0.qclass.yaml)|base class for creating workflow orders based on **Salesforce** events
### WebSocket Server Building Blocks
|Building Block|Description|
......
%new-style
%strict-args
%require-types
%enable-all-warnings
%requires CsvUtil
class BBM_CsvReadDataProvider inherits CsvReadDataProvider {
public {
# map config item names to data provider options
const KeyMap = map {
("csv-read-" + regex_subst($1, "_", "-", RE_Global)): $1,
}, keys CsvReadDataProvider::ConstructorOptions;
}
constructor() : CsvReadDataProvider(BBM_CsvReadDataProvider::getOptions()) {
}
#! Search records connector
/** input data is ignored
output data: list of record hashes
*/
*list<hash<auto>> searchRecordsConnector(auto ignored) {
return map $1, searchRecords(
UserApi::getConfigItemValue("csv-read-search-where"),
UserApi::getConfigItemValue("csv-read-search-options"),
);
}
static hash<auto> getOptions() {
# map from config item names to data provider names in the option hash; remove empty values
hash<auto> opts = map {KeyMap{$1.key}: $1.value}, UserApi::getConfigItemHash().pairIterator(),
exists $1.value;
if (!opts.path && !opts.stream) {
throw "CSV-READ-ERROR", sprintf("either the 'path' or 'stream' option is required; options provided: %y",
keys opts);
}
UserApi::logInfo("csv read options: %y", opts);
return opts;
}
}
# This is a generated file, don't edit!
type: class
name: BBM_CsvReadDataProvider
desc: >-
Provides a config-based data provider object for data from CSV input data
lang: qore
author:
- Qore Technologies, s.r.o.
version: '1.0'
class-connectors:
- name: searchRecordsConnector
type: input-output
method: searchRecordsConnector
code: BBM_CsvReadDataProvider-1.0.qclass
config-items:
- name: csv-read-path
default_value:
null
description: The path to the Csv data; mutually exclusive with `csv-read-stream`
config_group: CSV Read Data Provider Main Options
type: "*string"
- name: csv-read-stream
default_value:
null
description: >-
A Java or Qore `InputStream` object providing the CSV data; mutually exclusive
with `csv-read-path`
config_group: CSV Read Data Provider Main Options
type: "*string"
- name: csv-read-tolwr
default_value:
false
description: >-
If `true` then all automatically-detected header names will be converted to
lower case.
config_group: CSV Read Data Provider Header Options
type: bool
- name: csv-read-header-lines
default_value:
null
description: >
The number of header lines in the CSV data
config_group: CSV Read Data Provider Header Options
type: "*int"
- name: csv-read-header-names
default_value:
false
description: >-
if `true` then the object will parse the header names from the first header
row, in this case if `header_cells` is not set explicitly, it will be
assumed to be `1`
config_group: CSV Read Data Provider Header Options
type: bool
- name: csv-read-header-reorder
default_value:
true
description: >-
if `true` (the default) then if `headers` are provided, then data fields are reordered to
follow headers
config_group: CSV Read Data Provider Header Options
type: bool
- name: csv-read-headers
default_value:
null
description: >-
The ist of header / column names for the data iterated; if this is present,
then `header_names` must be `false`
config_group: CSV Read Data Provider Header Options
type: "*list"
- name: csv-read-eol
default_value:
null
description: >-
The end of line characters used in the input data.
If not set the end of line characters will be automatically detected.
config_group: CSV Read Data Provider Field Options
type: "*string"
- name: csv-read-fields
default_value:
null
description: >-
The fields in the CSV data; keys are field names as given by the `header_names`
or `headers` options (in case neither of these options are used, then field names
are numbers starting with `0`) and the values are either strings (one of `bool`,
`int`, `float`, `number`, `string`, `date`, optionally prefixed by `*` if the
value is optional) or a hash describing the field; also sets `headers` if not set
automatically with `header_names`; if no field type is given, the default is
`*string`; note that invalid field names given in this option are ignored
config_group: CSV Read Data Provider Field Options
type: "*hash"
- name: csv-read-separator
default_value:
null
description: >-
The string separating the fields in the file (default: `,`).
config_group: CSV Read Data Provider Field Options
type: "*string"
- name: csv-read-quote
default_value:
"\""
description: >-
the field quote character (default: `\"`)
config_group: CSV Read Data Provider Field Options
type: "*string"
- name: csv-read-verify-columns
default_value:
true
description: >-
if `true` (default `false`), if a line is parsed with a different column or field count
than other lines, an exception is raised
config_group: CSV Read Data Provider Field Options
type: bool
- name: csv-read-date-format
default_value:
null
description: >-
The date format to use when parsing date/time values (ex:
`DD.MM.YYYY`).
If not set then ISO-8601 format is assumed (i.e. `YYYY-MM-DD HH:mm:SS`)
config_group: CSV Read Data Provider Data Options
type: "*string"
- name: csv-read-encoding
default_value:
null
description: >-
The character encoding used for the input data.
If not set then \c UTF-8 is assumed
config_group: CSV Read Data Provider Data Options
type: "*string"
- name: csv-read-ignore-empty
default_value:
false
description: >-
if `true` then empty lines will be ignored
config_group: CSV Read Data Provider Data Options
type: bool
- name: csv-read-ignore-whitespace
default_value:
false
description: >-
if `true`, leading and trailing whitespace will be stripped from non-quoted field
config_group: CSV Read Data Provider Data Options
type: bool
- name: csv-read-number-format
default_value:
null
description: >-
the default format for `int`, `float`, and `number` fields as a string giving the thousands
separator character followed by the decimal separator character (ex: `.,` for
continental-European-style numbers)
config_group: CSV Read Data Provider Data Options
type: "*string"
- name: csv-read-timezone
default_value:
null
description: >-
The timezone region to use when creating date/time values (ex:
`Europe/Prague`).
If not set then all dates are assumed to be in the server's time zone.
config_group: CSV Read Data Provider Data Options
type: "*string"
- name: csv-read-search-where
default_value:
null
description: Search "where" criteria for the `searchRecords` connector
config_group: CSV Read Data Provider Search Options
type: "*hash"
- name: csv-read-search-options
default_value:
null
description: Search options for the `searchRecords` connector
config_group: CSV Read Data Provider Search Options
type: "*hash"
%new-style
%strict-args
%require-types
%enable-all-warnings
%requires ExcelDataProvider
class BBM_ExcelReadDataProvider inherits ExcelWorksheetReadDataProvider {
public {
# map config item names to data provider options
const KeyMap = map {
("excel-read-" + regex_subst($1, "_", "-", RE_Global)): $1,
}, keys ExcelWorksheetReadDataProvider::ConstructorOptions;
}
constructor() : ExcelWorksheetReadDataProvider(BBM_ExcelReadDataProvider::getOptions()) {
}
#! Search records connector
/** input data is ignored
output data: list of record hashes
*/
*list<hash<auto>> searchRecordsConnector(auto ignored) {
return map $1, searchRecords(
UserApi::getConfigItemValue("excel-read-search-where"),
UserApi::getConfigItemValue("excel-read-search-options"),
);
}
static hash<auto> getOptions() {
# map from config item names to data provider names in the option hash; remove empty values
hash<auto> opts = map {KeyMap{$1.key}: $1.value}, UserApi::getConfigItemHash().pairIterator(),
exists $1.value;
if (!opts.path && !opts.stream) {
throw "EXCEL-READ-ERROR", sprintf("either the 'path' or 'stream' option is required; options "
"provided: %y", keys opts);
}
UserApi::logInfo("excel read options: %y", opts);
return opts;
}
}
# This is a generated file, don't edit!
type: class
name: BBM_ExcelReadDataProvider
desc: >-
Provides a config-based data provider object for data from a **[Microsoft
Excel](https://www.microsoft.com/en-us/microsoft-365/excel)** spreadsheet
lang: qore
author:
- Qore Technologies, s.r.o.
version: '1.0'
class-connectors:
- name: searchRecordsConnector
type: input-output
method: searchRecordsConnector
code: BBM_ExcelReadDataProvider-1.0.qclass
config-items:
- name: excel-read-worksheet
default_value:
null
description: >
The name of the worksheet to process; if missing, the first worksheet is
used
config_group: Excel Read Data Provider
type: "*string"
- name: excel-read-data-cells
default_value:
null
description: >-
The range of cells for data to process; this can one of the following
options:
- a range of rows: ex **1:20**
- a range of specific cells: ex **B2:K20**
If this value is not provided, then data processing starts at the first row
of the spreadsheet or the first row of the spreadsheet after the last header
row, and columns are detected automatically; the first empty cell is used as
the end of data for the row.
config_group: Excel Read Data Provider
type: "*string"
- name: excel-read-header-cells
default_value:
null
description: >
The range of cells for headers; possible values are:
- a single row number: ex **1**
- a range of row numbers: ex **2:5**
- a range of cells: ex **A1:H1**
When using row numbers, header cells are detected automatically; the first
empty cell signals the end of header data.
config_group: Excel Read Data Provider
type: "*string"
- name: excel-read-header-names
default_value:
false
description: >-
if `true` then the object will parse the header names from the first header
row, in this case if `header_cells` is not set explicitly, it will be
assumed to be `1`
config_group: Excel Read Data Provider
type: bool
- name: excel-read-headers
default_value:
null
description: >-
The ist of header / column names for the data iterated; if this is present,
then `header_names` must be `false`
config_group: Excel Read Data Provider
type: "*list"
- name: excel-read-path
default_value:
null
description: The path to the Excel data; mutually exclusive with `stream`
config_group: Excel Read Data Provider
type: "*string"
- name: excel-read-stream
default_value:
null
description: >-
A Java or Qore `InputStream` object providing the binary Excel spreadsheet
data; mutually exclusive with `path`
config_group: Excel Read Data Provider
type: "*string"
- name: excel-read-timezone
default_value:
null
description: >-
The timezone region to use when creating date/time values (ex:
`Europe/Prague`).