mirror of https://github.com/mongodb/mongo
SERVER-109047: import avro cpp library (#40107)
GitOrigin-RevId: f117bdb2a2937d4653d04f7fe83af8e7e8961d5a
This commit is contained in:
parent
334f29bbe6
commit
7d284d9cfb
|
|
@ -3036,6 +3036,7 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
|
|||
# The following patterns are parsed from ./src/third_party/OWNERS.yml
|
||||
/src/third_party/**/abseil-cpp @10gen/server-programmability @svc-auto-approve-bot
|
||||
/src/third_party/**/asio @10gen/server-networking-and-observability @svc-auto-approve-bot
|
||||
/src/third_party/**/avro-cpp @10gen/streams-engine @svc-auto-approve-bot
|
||||
/src/third_party/**/aws-sdk @10gen/streams-engine @svc-auto-approve-bot
|
||||
/src/third_party/**/benchmark @10gen/server-programmability @svc-auto-approve-bot
|
||||
/src/third_party/**/boost @10gen/server-programmability @svc-auto-approve-bot
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ a notice will be included in
|
|||
| [Abseil] | Apache-2.0 | 20250512.1 | | ✗ |
|
||||
| [arximboldi/immer] | BSL-1.0 | Unknown | | ✗ |
|
||||
| [Asio C++ Library] | BSL-1.0 | 1.12.2 | | ✗ |
|
||||
| [Apache Avro C++] | Apache-2.0 | 1.12.0 | | ✗ |
|
||||
| [aws-sdk - the AWS SDK client library] | Apache-2.0 | 1.11.471 | | ✗ |
|
||||
| [benchmark] | Apache-2.0 | v1.5.2 | | |
|
||||
| [Boost C++ Libraries - boost] | BSL-1.0 | 1.88.0 | | ✗ |
|
||||
|
|
@ -79,6 +80,7 @@ a notice will be included in
|
|||
|
||||
[Abseil]: https://github.com/abseil/abseil-cpp
|
||||
[Asio C++ Library]: https://github.com/chriskohlhoff/asio
|
||||
[Apache Avro C++]: https://avro.apache.org/
|
||||
[Boost C++ Libraries - boost]: http://www.boost.org/
|
||||
[Cyrus SASL]: https://www.cyrusimap.org/sasl/
|
||||
[ICU for C/C++ (ICU4C)]: http://site.icu-project.org/download/
|
||||
|
|
|
|||
56
sbom.json
56
sbom.json
|
|
@ -252,6 +252,57 @@
|
|||
},
|
||||
"scope": "required"
|
||||
},
|
||||
{
|
||||
"type": "library",
|
||||
"bom-ref": "pkg:github/apache/avro@release-1.12.0",
|
||||
"supplier": {
|
||||
"name": "The Apache Software Foundation",
|
||||
"url": [
|
||||
"https://www.apache.org/"
|
||||
]
|
||||
},
|
||||
"author": "Apache Avro Developers",
|
||||
"group": "apache",
|
||||
"name": "Apache Avro C++",
|
||||
"version": "1.12.0",
|
||||
"description": "Apache Avro is a data serialization system. This is the C++ implementation.",
|
||||
"licenses": [
|
||||
{
|
||||
"license": {
|
||||
"id": "Apache-2.0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"copyright": "Copyright 2010-2024 The Apache Software Foundation",
|
||||
"cpe": "cpe:2.3:a:apache:avro:1.12.0:*:*:*:*:*:*:*",
|
||||
"purl": "pkg:github/apache/avro@release-1.12.0",
|
||||
"properties": [
|
||||
{
|
||||
"name": "internal:team_responsible",
|
||||
"value": "Server Programmability"
|
||||
},
|
||||
{
|
||||
"name": "emits_persisted_data",
|
||||
"value": "false"
|
||||
},
|
||||
{
|
||||
"name": "info_link",
|
||||
"value": "https://avro.apache.org/"
|
||||
},
|
||||
{
|
||||
"name": "import_script_path",
|
||||
"value": "src/third_party/avro-cpp/scripts/import.sh"
|
||||
}
|
||||
],
|
||||
"evidence": {
|
||||
"occurrences": [
|
||||
{
|
||||
"location": "src/third_party/avro-cpp"
|
||||
}
|
||||
]
|
||||
},
|
||||
"scope": "required"
|
||||
},
|
||||
{
|
||||
"type": "library",
|
||||
"bom-ref": "pkg:github/google/benchmark@v1.5.2",
|
||||
|
|
@ -2699,6 +2750,7 @@
|
|||
"pkg:github/aappleby/smhasher@a6bd3ce7be8ad147ea820a7cf6229a975c0c96bb",
|
||||
"pkg:github/abseil/abseil-cpp@20250512.1",
|
||||
"pkg:github/antirez/linenoise@6cdc775807e57b2c3fd64bd207814f8ee1fe35f3",
|
||||
"pkg:github/apache/avro@release-1.12.0",
|
||||
"pkg:github/arximboldi/immer@v0.8.0",
|
||||
"pkg:github/aws/aws-sdk-cpp@1.11.471",
|
||||
"pkg:github/boostorg/boost@boost-1.88.0",
|
||||
|
|
@ -2777,6 +2829,10 @@
|
|||
"ref": "pkg:github/arximboldi/immer@v0.8.0",
|
||||
"dependsOn": []
|
||||
},
|
||||
{
|
||||
"ref": "pkg:github/apache/avro@release-1.12.0",
|
||||
"dependsOn": []
|
||||
},
|
||||
{
|
||||
"ref": "pkg:github/aws/aws-sdk-cpp@1.11.471",
|
||||
"dependsOn": []
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@ filters:
|
|||
- "asio":
|
||||
approvers:
|
||||
- 10gen/server-networking-and-observability
|
||||
- "avro-cpp":
|
||||
approvers:
|
||||
- 10gen/streams-engine
|
||||
- "aws-sdk":
|
||||
approvers:
|
||||
- 10gen/streams-engine
|
||||
|
|
|
|||
|
|
@ -0,0 +1,69 @@
|
|||
load("//bazel:mongo_src_rules.bzl", "mongo_cc_library", "mongo_cc_unit_test")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mongo_cc_library(
|
||||
name = "avro-cpp",
|
||||
srcs = glob(
|
||||
[
|
||||
"dist/impl/*.cc",
|
||||
"dist/impl/parsing/*.cc",
|
||||
"dist/impl/json/*.cc",
|
||||
],
|
||||
exclude = [
|
||||
"dist/impl/avrogencpp.cc", # Code generator tool with heavy boost deps
|
||||
"dist/impl/DataFile.cc", # File I/O with boost iostreams deps
|
||||
],
|
||||
) + [
|
||||
"//src/third_party/boost:headers", # For boost headers
|
||||
],
|
||||
hdrs = glob([
|
||||
"dist/include/avro/*.hh",
|
||||
"dist/include/avro/buffer/*.hh",
|
||||
"dist/include/avro/buffer/detail/*.hh",
|
||||
"dist/impl/json/*.hh",
|
||||
"dist/impl/parsing/*.hh",
|
||||
]),
|
||||
copts = [
|
||||
"-DAVRO_SOURCE",
|
||||
# Disable warnings that might cause build failures
|
||||
"-Wno-unused-parameter",
|
||||
"-Wno-unused-but-set-variable",
|
||||
"-Wno-unused-exception-parameter",
|
||||
"-Wno-implicit-fallthrough",
|
||||
"-Wno-sign-compare",
|
||||
] + select({
|
||||
"//bazel/config:compiler_type_gcc": [
|
||||
"-Wno-maybe-uninitialized",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
includes = [
|
||||
"dist/impl", # For impl internal headers
|
||||
"dist/impl/json", # For json internal headers
|
||||
"dist/impl/parsing", # For parsing internal headers
|
||||
"dist/include",
|
||||
"dist/include/avro", # For relative includes in source files
|
||||
"dist/include/avro/buffer", # For buffer detail headers
|
||||
],
|
||||
deps = [
|
||||
"//src/third_party/fmt",
|
||||
"//src/third_party/zlib",
|
||||
"//src/mongo/util:boost_assert_shim", # Required when using boost headers
|
||||
"//src/third_party/boost:boost_program_options", # For boost/program_options.hpp
|
||||
"//src/third_party/boost:boost_iostreams", # For boost/iostreams/*
|
||||
# Note: Other boost headers (algorithm, any, utility, etc.) are header-only
|
||||
# and should be available through the boost libraries above
|
||||
# Note: Snappy and zstd are optional compression codecs
|
||||
],
|
||||
# Optional: Add linkopts if needed
|
||||
# linkopts = [],
|
||||
)
|
||||
|
||||
# test compilation for Avro C++ library
|
||||
mongo_cc_unit_test(
|
||||
name = "avro_tests",
|
||||
srcs = ["test_compilation.cc"],
|
||||
tags = ["mongo_unittest_fourth_group"],
|
||||
deps = [":avro-cpp"],
|
||||
)
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
See https://avro.apache.org/ for a list of authors
|
||||
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
Refer to CHANGES.txt in the root of avro repository for change log
|
||||
|
|
@ -0,0 +1,231 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
https://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
https://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
License for the AVRO_BOOT_NO_TRAIT code in the C++ implementation:
|
||||
File: lang/c++/api/Boost.hh
|
||||
|
||||
| Boost Software License - Version 1.0 - August 17th, 2003
|
||||
|
|
||||
| Permission is hereby granted, free of charge, to any person or organization
|
||||
| obtaining a copy of the software and accompanying documentation covered by
|
||||
| this license (the "Software") to use, reproduce, display, distribute,
|
||||
| execute, and transmit the Software, and to prepare derivative works of the
|
||||
| Software, and to permit third-parties to whom the Software is furnished to
|
||||
| do so, all subject to the following:
|
||||
|
|
||||
| The copyright notices in the Software and this entire statement, including
|
||||
| the above license grant, this restriction and the following disclaimer,
|
||||
| must be included in all copies of the Software, in whole or in part, and
|
||||
| all derivative works of the Software, unless such copies or derivative
|
||||
| works are solely in the form of machine-executable object code generated by
|
||||
| a source language processor.
|
||||
|
|
||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
| FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
| SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
| FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
| ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
| DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
|
||||
For news, visit the Avro web site at
|
||||
https://avro.apache.org/
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
Apache Avro
|
||||
Copyright 2010-2015 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (https://www.apache.org/).
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
Avro C++ README.txt
|
||||
|
||||
The C++ port is thus far incomplete. Currently, it contains:
|
||||
|
||||
- Serializer/Parser- objects for writing/reading raw binary.
|
||||
|
||||
- xxxSchema- objects for composing schemas.
|
||||
|
||||
- ValidSchema- a schema object that has been converted to a parse tree
|
||||
(with some sanity checks).
|
||||
|
||||
- ValidSchema.toJson() writes the schema as a json object.
|
||||
|
||||
- ValidatingSerializer/ValidatingParser- check that reads/writes
|
||||
match the expected schema type (more expensive than the raw
|
||||
serializer/parser but they detect errors, and allow dynamic
|
||||
discovery of parsed data/attributes).
|
||||
|
||||
- Compiler (compileJsonSchema())- converts a Json string schema to a
|
||||
ValidSchema.
|
||||
|
||||
- Code Generation (experimental) - given a schema it generates C++
|
||||
objects of the same data types, and the code to serialize and parse
|
||||
it.
|
||||
|
||||
What's missing: Rpc containers are not yet implemented. Documentation is sparse.
|
||||
|
||||
INSTRUCTIONS
|
||||
|
||||
Pre-requisites:
|
||||
|
||||
To compile requires boost headers. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS).
|
||||
|
||||
To build one requires cmake 3.5 or later and a compiler supporting C++17 or later.
|
||||
|
||||
To generate a Makefile under Unix, MacOS (using GNU) or Cygwin use:
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G "Unix Makefiles" ..
|
||||
|
||||
If it doesn't work, either you are missing boost package or you need to help
|
||||
configure locate it.
|
||||
|
||||
If the Makefile is configured correctly, then you can make and run tests:
|
||||
|
||||
make
|
||||
ctest
|
||||
|
||||
To install
|
||||
|
||||
make package
|
||||
|
||||
and then untar the generated .tar.gz file.
|
||||
|
||||
To build and test on MacOS (using Xcode)
|
||||
|
||||
mkdir build.mac
|
||||
cd build.mac
|
||||
cmake -G Xcode
|
||||
|
||||
xcodebuild -configuration Release
|
||||
ctest -C Release
|
||||
|
||||
If debug version is required, replace 'Release' above with 'Debug'.
|
||||
|
||||
Note: The LICENSE and NOTICE files in the lang/c++ source directory are used to
|
||||
build the binary distribution. The LICENSE and NOTICE information for the Avro
|
||||
C++ source distribution is in the root directory.
|
||||
|
|
@ -0,0 +1,220 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Decoder.hh"
|
||||
#include "Exception.hh"
|
||||
#include "Zigzag.hh"
|
||||
#include <memory>
|
||||
|
||||
namespace avro {
|
||||
|
||||
using std::make_shared;
|
||||
|
||||
class BinaryDecoder : public Decoder {
|
||||
StreamReader in_;
|
||||
|
||||
void init(InputStream &is) final;
|
||||
void decodeNull() final;
|
||||
bool decodeBool() final;
|
||||
int32_t decodeInt() final;
|
||||
int64_t decodeLong() final;
|
||||
float decodeFloat() final;
|
||||
double decodeDouble() final;
|
||||
void decodeString(std::string &value) final;
|
||||
void skipString() final;
|
||||
void decodeBytes(std::vector<uint8_t> &value) final;
|
||||
void skipBytes() final;
|
||||
void decodeFixed(size_t n, std::vector<uint8_t> &value) final;
|
||||
void skipFixed(size_t n) final;
|
||||
size_t decodeEnum() final;
|
||||
size_t arrayStart() final;
|
||||
size_t arrayNext() final;
|
||||
size_t skipArray() final;
|
||||
size_t mapStart() final;
|
||||
size_t mapNext() final;
|
||||
size_t skipMap() final;
|
||||
size_t decodeUnionIndex() final;
|
||||
|
||||
int64_t doDecodeLong();
|
||||
size_t doDecodeItemCount();
|
||||
size_t doDecodeLength();
|
||||
void drain() final;
|
||||
};
|
||||
|
||||
DecoderPtr binaryDecoder() {
|
||||
return make_shared<BinaryDecoder>();
|
||||
}
|
||||
|
||||
void BinaryDecoder::init(InputStream &is) {
|
||||
in_.reset(is);
|
||||
}
|
||||
|
||||
void BinaryDecoder::decodeNull() {
|
||||
}
|
||||
|
||||
bool BinaryDecoder::decodeBool() {
|
||||
auto v = in_.read();
|
||||
if (v == 0) {
|
||||
return false;
|
||||
} else if (v == 1) {
|
||||
return true;
|
||||
}
|
||||
throw Exception("Invalid value for bool: {}", v);
|
||||
}
|
||||
|
||||
int32_t BinaryDecoder::decodeInt() {
|
||||
auto val = doDecodeLong();
|
||||
if (val < INT32_MIN || val > INT32_MAX) {
|
||||
throw Exception("Value out of range for Avro int: {}", val);
|
||||
}
|
||||
return static_cast<int32_t>(val);
|
||||
}
|
||||
|
||||
int64_t BinaryDecoder::decodeLong() {
|
||||
return doDecodeLong();
|
||||
}
|
||||
|
||||
float BinaryDecoder::decodeFloat() {
|
||||
float result;
|
||||
in_.readBytes(reinterpret_cast<uint8_t *>(&result), sizeof(float));
|
||||
return result;
|
||||
}
|
||||
|
||||
double BinaryDecoder::decodeDouble() {
|
||||
double result;
|
||||
in_.readBytes(reinterpret_cast<uint8_t *>(&result), sizeof(double));
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::doDecodeLength() {
|
||||
ssize_t len = decodeInt();
|
||||
if (len < 0) {
|
||||
throw Exception("Cannot have negative length: {}", len);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
void BinaryDecoder::drain() {
|
||||
in_.drain(false);
|
||||
}
|
||||
|
||||
void BinaryDecoder::decodeString(std::string &value) {
|
||||
size_t len = doDecodeLength();
|
||||
value.resize(len);
|
||||
if (len > 0) {
|
||||
in_.readBytes(const_cast<uint8_t *>(
|
||||
reinterpret_cast<const uint8_t *>(value.c_str())),
|
||||
len);
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryDecoder::skipString() {
|
||||
size_t len = doDecodeLength();
|
||||
in_.skipBytes(len);
|
||||
}
|
||||
|
||||
void BinaryDecoder::decodeBytes(std::vector<uint8_t> &value) {
|
||||
size_t len = doDecodeLength();
|
||||
value.resize(len);
|
||||
if (len > 0) {
|
||||
in_.readBytes(value.data(), len);
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryDecoder::skipBytes() {
|
||||
size_t len = doDecodeLength();
|
||||
in_.skipBytes(len);
|
||||
}
|
||||
|
||||
void BinaryDecoder::decodeFixed(size_t n, std::vector<uint8_t> &value) {
|
||||
value.resize(n);
|
||||
if (n > 0) {
|
||||
in_.readBytes(value.data(), n);
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryDecoder::skipFixed(size_t n) {
|
||||
in_.skipBytes(n);
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::decodeEnum() {
|
||||
return static_cast<size_t>(doDecodeLong());
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::arrayStart() {
|
||||
return doDecodeItemCount();
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::doDecodeItemCount() {
|
||||
auto result = doDecodeLong();
|
||||
if (result < 0) {
|
||||
doDecodeLong();
|
||||
return static_cast<size_t>(-result);
|
||||
}
|
||||
return static_cast<size_t>(result);
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::arrayNext() {
|
||||
return static_cast<size_t>(doDecodeLong());
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::skipArray() {
|
||||
for (;;) {
|
||||
auto r = doDecodeLong();
|
||||
if (r < 0) {
|
||||
auto n = static_cast<size_t>(doDecodeLong());
|
||||
in_.skipBytes(n);
|
||||
} else {
|
||||
return static_cast<size_t>(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::mapStart() {
|
||||
return doDecodeItemCount();
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::mapNext() {
|
||||
return doDecodeItemCount();
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::skipMap() {
|
||||
return skipArray();
|
||||
}
|
||||
|
||||
size_t BinaryDecoder::decodeUnionIndex() {
|
||||
return static_cast<size_t>(doDecodeLong());
|
||||
}
|
||||
|
||||
int64_t BinaryDecoder::doDecodeLong() {
|
||||
uint64_t encoded = 0;
|
||||
int shift = 0;
|
||||
uint8_t u;
|
||||
do {
|
||||
if (shift >= 64) {
|
||||
throw Exception("Invalid Avro varint");
|
||||
}
|
||||
u = in_.read();
|
||||
encoded |= static_cast<uint64_t>(u & 0x7f) << shift;
|
||||
shift += 7;
|
||||
} while (u & 0x80);
|
||||
|
||||
return decodeZigzag64(encoded);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Encoder.hh"
|
||||
#include "Zigzag.hh"
|
||||
#include <array>
|
||||
|
||||
namespace avro {
|
||||
|
||||
using std::make_shared;
|
||||
|
||||
class BinaryEncoder : public Encoder {
|
||||
StreamWriter out_;
|
||||
|
||||
void init(OutputStream &os) final;
|
||||
void flush() final;
|
||||
int64_t byteCount() const final;
|
||||
void encodeNull() final;
|
||||
void encodeBool(bool b) final;
|
||||
void encodeInt(int32_t i) final;
|
||||
void encodeLong(int64_t l) final;
|
||||
void encodeFloat(float f) final;
|
||||
void encodeDouble(double d) final;
|
||||
void encodeString(const std::string &s) final;
|
||||
void encodeBytes(const uint8_t *bytes, size_t len) final;
|
||||
void encodeFixed(const uint8_t *bytes, size_t len) final;
|
||||
void encodeEnum(size_t e) final;
|
||||
void arrayStart() final;
|
||||
void arrayEnd() final;
|
||||
void mapStart() final;
|
||||
void mapEnd() final;
|
||||
void setItemCount(size_t count) final;
|
||||
void startItem() final;
|
||||
void encodeUnionIndex(size_t e) final;
|
||||
|
||||
void doEncodeLong(int64_t l);
|
||||
};
|
||||
|
||||
EncoderPtr binaryEncoder() {
|
||||
return make_shared<BinaryEncoder>();
|
||||
}
|
||||
|
||||
void BinaryEncoder::init(OutputStream &os) {
|
||||
out_.reset(os);
|
||||
}
|
||||
|
||||
void BinaryEncoder::flush() {
|
||||
out_.flush();
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeNull() {
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeBool(bool b) {
|
||||
out_.write(b ? 1 : 0);
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeInt(int32_t i) {
|
||||
doEncodeLong(i);
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeLong(int64_t l) {
|
||||
doEncodeLong(l);
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeFloat(float f) {
|
||||
const auto *p = reinterpret_cast<const uint8_t *>(&f);
|
||||
out_.writeBytes(p, sizeof(float));
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeDouble(double d) {
|
||||
const auto *p = reinterpret_cast<const uint8_t *>(&d);
|
||||
out_.writeBytes(p, sizeof(double));
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeString(const std::string &s) {
|
||||
doEncodeLong(s.size());
|
||||
out_.writeBytes(reinterpret_cast<const uint8_t *>(s.c_str()), s.size());
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeBytes(const uint8_t *bytes, size_t len) {
|
||||
doEncodeLong(len);
|
||||
out_.writeBytes(bytes, len);
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeFixed(const uint8_t *bytes, size_t len) {
|
||||
out_.writeBytes(bytes, len);
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeEnum(size_t e) {
|
||||
doEncodeLong(e);
|
||||
}
|
||||
|
||||
void BinaryEncoder::arrayStart() {
|
||||
}
|
||||
|
||||
void BinaryEncoder::arrayEnd() {
|
||||
doEncodeLong(0);
|
||||
}
|
||||
|
||||
void BinaryEncoder::mapStart() {
|
||||
}
|
||||
|
||||
void BinaryEncoder::mapEnd() {
|
||||
doEncodeLong(0);
|
||||
}
|
||||
|
||||
void BinaryEncoder::setItemCount(size_t count) {
|
||||
if (count == 0) {
|
||||
throw Exception("Count cannot be zero");
|
||||
}
|
||||
doEncodeLong(count);
|
||||
}
|
||||
|
||||
void BinaryEncoder::startItem() {
|
||||
}
|
||||
|
||||
void BinaryEncoder::encodeUnionIndex(size_t e) {
|
||||
doEncodeLong(e);
|
||||
}
|
||||
|
||||
int64_t BinaryEncoder::byteCount() const {
|
||||
return out_.byteCount();
|
||||
}
|
||||
|
||||
void BinaryEncoder::doEncodeLong(int64_t l) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
std::array<uint8_t, 10> bytes;
|
||||
auto size = encodeInt64(l, bytes);
|
||||
out_.writeBytes(bytes.data(), size);
|
||||
}
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,604 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "Compiler.hh"
|
||||
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include "CustomAttributes.hh"
|
||||
#include "NodeConcepts.hh"
|
||||
#include "Schema.hh"
|
||||
#include "Stream.hh"
|
||||
#include "Types.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
#include "json/JsonDom.hh"
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
|
||||
using std::make_pair;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace avro {
|
||||
using json::Array;
|
||||
using json::Entity;
|
||||
using json::EntityType;
|
||||
using json::Object;
|
||||
|
||||
using SymbolTable = map<Name, NodePtr>;
|
||||
|
||||
// #define DEBUG_VERBOSE
|
||||
|
||||
static NodePtr makePrimitive(const string& t) {
|
||||
if (t == "null") {
|
||||
return NodePtr(new NodePrimitive(AVRO_NULL));
|
||||
} else if (t == "boolean") {
|
||||
return NodePtr(new NodePrimitive(AVRO_BOOL));
|
||||
} else if (t == "int") {
|
||||
return NodePtr(new NodePrimitive(AVRO_INT));
|
||||
} else if (t == "long") {
|
||||
return NodePtr(new NodePrimitive(AVRO_LONG));
|
||||
} else if (t == "float") {
|
||||
return NodePtr(new NodePrimitive(AVRO_FLOAT));
|
||||
} else if (t == "double") {
|
||||
return NodePtr(new NodePrimitive(AVRO_DOUBLE));
|
||||
} else if (t == "string") {
|
||||
return NodePtr(new NodePrimitive(AVRO_STRING));
|
||||
} else if (t == "bytes") {
|
||||
return NodePtr(new NodePrimitive(AVRO_BYTES));
|
||||
} else {
|
||||
return NodePtr();
|
||||
}
|
||||
}
|
||||
|
||||
static NodePtr makeNode(const json::Entity& e, SymbolTable& st, const string& ns);
|
||||
|
||||
template <typename T>
|
||||
concepts::SingleAttribute<T> asSingleAttribute(const T& t) {
|
||||
concepts::SingleAttribute<T> n;
|
||||
n.add(t);
|
||||
return n;
|
||||
}
|
||||
|
||||
static bool isFullName(const string& s) {
|
||||
return s.find('.') != string::npos;
|
||||
}
|
||||
|
||||
static Name getName(const string& name, const string& ns) {
|
||||
return (isFullName(name)) ? Name(name) : Name(name, ns);
|
||||
}
|
||||
|
||||
static NodePtr makeNode(const string& t, SymbolTable& st, const string& ns) {
|
||||
NodePtr result = makePrimitive(t);
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
Name n = getName(t, ns);
|
||||
|
||||
auto it = st.find(n);
|
||||
if (it != st.end()) {
|
||||
return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second));
|
||||
}
|
||||
throw Exception("Unknown type: {}", n);
|
||||
}
|
||||
|
||||
/** Returns "true" if the field is in the container */
|
||||
// e.g.: can be false for non-mandatory fields
|
||||
bool containsField(const Object& m, const string& fieldName) {
|
||||
auto it = m.find(fieldName);
|
||||
return (it != m.end());
|
||||
}
|
||||
|
||||
json::Object::const_iterator findField(const Entity& e, const Object& m, const string& fieldName);
|
||||
|
||||
template <typename T>
|
||||
void ensureType(const Entity& e, const string& name) {
|
||||
if (e.type() != json::type_traits<T>::type()) {
|
||||
throw Exception(
|
||||
"Json field \"{}\" is not a {}: {}", name, json::type_traits<T>::name(), e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
string getStringField(const Entity& e, const Object& m, const string& fieldName) {
|
||||
auto it = findField(e, m, fieldName);
|
||||
ensureType<string>(it->second, fieldName);
|
||||
return it->second.stringValue();
|
||||
}
|
||||
|
||||
const Array& getArrayField(const Entity& e, const Object& m, const string& fieldName);
|
||||
|
||||
int64_t getLongField(const Entity& e, const Object& m, const string& fieldName) {
|
||||
auto it = findField(e, m, fieldName);
|
||||
ensureType<int64_t>(it->second, fieldName);
|
||||
return it->second.longValue();
|
||||
}
|
||||
|
||||
// Unescape double quotes (") for de-serialization. This method complements the
|
||||
// method NodeImpl::escape() which is used for serialization.
|
||||
static void unescape(string& s) {
|
||||
boost::replace_all(s, "\\\"", "\"");
|
||||
}
|
||||
|
||||
string getDocField(const Entity& e, const Object& m) {
|
||||
string doc = getStringField(e, m, "doc");
|
||||
unescape(doc);
|
||||
return doc;
|
||||
}
|
||||
|
||||
struct Field {
|
||||
const string name;
|
||||
const vector<string> aliases;
|
||||
const NodePtr schema;
|
||||
const GenericDatum defaultValue;
|
||||
const CustomAttributes customAttributes;
|
||||
|
||||
Field(string n, vector<string> a, NodePtr v, GenericDatum dv, const CustomAttributes& ca)
|
||||
: name(std::move(n)),
|
||||
aliases(std::move(a)),
|
||||
schema(std::move(v)),
|
||||
defaultValue(std::move(dv)),
|
||||
customAttributes(ca) {}
|
||||
};
|
||||
|
||||
static void assertType(const Entity& e, EntityType et) {
|
||||
if (e.type() != et) {
|
||||
throw Exception("Unexpected type for default value: Expected {}, but found {} in line {}",
|
||||
json::typeToString(et),
|
||||
json::typeToString(e.type()),
|
||||
e.line());
|
||||
}
|
||||
}
|
||||
|
||||
static vector<uint8_t> toBin(const string& s) {
|
||||
vector<uint8_t> result(s.size());
|
||||
if (!s.empty()) {
|
||||
std::copy(s.c_str(), s.c_str() + s.size(), result.data());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static GenericDatum makeGenericDatum(NodePtr n, const Entity& e, const SymbolTable& st) {
|
||||
Type t = n->type();
|
||||
EntityType dt = e.type();
|
||||
|
||||
if (t == AVRO_SYMBOLIC) {
|
||||
n = st.find(n->name())->second;
|
||||
t = n->type();
|
||||
}
|
||||
switch (t) {
|
||||
case AVRO_STRING:
|
||||
assertType(e, json::EntityType::String);
|
||||
return GenericDatum(e.stringValue());
|
||||
case AVRO_BYTES:
|
||||
assertType(e, json::EntityType::String);
|
||||
return GenericDatum(toBin(e.bytesValue()));
|
||||
case AVRO_INT:
|
||||
assertType(e, json::EntityType::Long);
|
||||
return GenericDatum(static_cast<int32_t>(e.longValue()));
|
||||
case AVRO_LONG:
|
||||
assertType(e, json::EntityType::Long);
|
||||
return GenericDatum(e.longValue());
|
||||
case AVRO_FLOAT:
|
||||
if (dt == json::EntityType::Long) {
|
||||
return GenericDatum(static_cast<float>(e.longValue()));
|
||||
}
|
||||
assertType(e, json::EntityType::Double);
|
||||
return GenericDatum(static_cast<float>(e.doubleValue()));
|
||||
case AVRO_DOUBLE:
|
||||
if (dt == json::EntityType::Long) {
|
||||
return GenericDatum(static_cast<double>(e.longValue()));
|
||||
}
|
||||
assertType(e, json::EntityType::Double);
|
||||
return GenericDatum(e.doubleValue());
|
||||
case AVRO_BOOL:
|
||||
assertType(e, json::EntityType::Bool);
|
||||
return GenericDatum(e.boolValue());
|
||||
case AVRO_NULL:
|
||||
assertType(e, json::EntityType::Null);
|
||||
return GenericDatum();
|
||||
case AVRO_RECORD: {
|
||||
assertType(e, json::EntityType::Obj);
|
||||
GenericRecord result(n);
|
||||
const map<string, Entity>& v = e.objectValue();
|
||||
for (size_t i = 0; i < n->leaves(); ++i) {
|
||||
auto it = v.find(n->nameAt(i));
|
||||
if (it == v.end()) {
|
||||
throw Exception("No value found in default for {}", n->nameAt(i));
|
||||
}
|
||||
result.setFieldAt(i, makeGenericDatum(n->leafAt(i), it->second, st));
|
||||
}
|
||||
return GenericDatum(n, result);
|
||||
}
|
||||
case AVRO_ENUM:
|
||||
assertType(e, json::EntityType::String);
|
||||
return GenericDatum(n, GenericEnum(n, e.stringValue()));
|
||||
case AVRO_ARRAY: {
|
||||
assertType(e, json::EntityType::Arr);
|
||||
GenericArray result(n);
|
||||
const vector<Entity>& elements = e.arrayValue();
|
||||
for (const auto& element : elements) {
|
||||
result.value().push_back(makeGenericDatum(n->leafAt(0), element, st));
|
||||
}
|
||||
return GenericDatum(n, result);
|
||||
}
|
||||
case AVRO_MAP: {
|
||||
assertType(e, json::EntityType::Obj);
|
||||
GenericMap result(n);
|
||||
const map<string, Entity>& v = e.objectValue();
|
||||
for (const auto& it : v) {
|
||||
result.value().push_back(
|
||||
make_pair(it.first, makeGenericDatum(n->leafAt(1), it.second, st)));
|
||||
}
|
||||
return GenericDatum(n, result);
|
||||
}
|
||||
case AVRO_UNION: {
|
||||
GenericUnion result(n);
|
||||
result.selectBranch(0);
|
||||
result.datum() = makeGenericDatum(n->leafAt(0), e, st);
|
||||
return GenericDatum(n, result);
|
||||
}
|
||||
case AVRO_FIXED:
|
||||
assertType(e, json::EntityType::String);
|
||||
return GenericDatum(n, GenericFixed(n, toBin(e.bytesValue())));
|
||||
default:
|
||||
throw Exception("Unknown type: {}", t);
|
||||
}
|
||||
}
|
||||
|
||||
static const std::unordered_set<std::string>& getKnownFields() {
|
||||
// return known fields
|
||||
static const std::unordered_set<std::string> kKnownFields = {"name",
|
||||
"type",
|
||||
"aliases",
|
||||
"default",
|
||||
"doc",
|
||||
"size",
|
||||
"logicalType",
|
||||
"values",
|
||||
"precision",
|
||||
"scale",
|
||||
"namespace"};
|
||||
return kKnownFields;
|
||||
}
|
||||
|
||||
static void getCustomAttributes(const Object& m, CustomAttributes& customAttributes) {
|
||||
// Don't add known fields on primitive type and fixed type into custom
|
||||
// fields.
|
||||
const std::unordered_set<std::string>& kKnownFields = getKnownFields();
|
||||
for (const auto& entry : m) {
|
||||
if (kKnownFields.find(entry.first) == kKnownFields.end()) {
|
||||
customAttributes.addAttribute(entry.first, entry.second.stringValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Field makeField(const Entity& e, SymbolTable& st, const string& ns) {
|
||||
const Object& m = e.objectValue();
|
||||
string n = getStringField(e, m, "name");
|
||||
vector<string> aliases;
|
||||
string aliasesName = "aliases";
|
||||
if (containsField(m, aliasesName)) {
|
||||
for (const auto& alias : getArrayField(e, m, aliasesName)) {
|
||||
aliases.emplace_back(alias.stringValue());
|
||||
}
|
||||
}
|
||||
auto it = findField(e, m, "type");
|
||||
auto it2 = m.find("default");
|
||||
NodePtr node = makeNode(it->second, st, ns);
|
||||
if (containsField(m, "doc")) {
|
||||
node->setDoc(getDocField(e, m));
|
||||
}
|
||||
GenericDatum d = (it2 == m.end()) ? GenericDatum() : makeGenericDatum(node, it2->second, st);
|
||||
// Get custom attributes
|
||||
CustomAttributes customAttributes;
|
||||
getCustomAttributes(m, customAttributes);
|
||||
return Field(std::move(n), std::move(aliases), node, d, customAttributes);
|
||||
}
|
||||
|
||||
// Extended makeRecordNode (with doc).
|
||||
static NodePtr makeRecordNode(const Entity& e,
|
||||
const Name& name,
|
||||
const string* doc,
|
||||
const Object& m,
|
||||
SymbolTable& st,
|
||||
const string& ns) {
|
||||
concepts::MultiAttribute<string> fieldNames;
|
||||
vector<vector<string>> fieldAliases;
|
||||
concepts::MultiAttribute<NodePtr> fieldValues;
|
||||
concepts::MultiAttribute<CustomAttributes> customAttributes;
|
||||
vector<GenericDatum> defaultValues;
|
||||
string fields = "fields";
|
||||
for (const auto& it : getArrayField(e, m, fields)) {
|
||||
Field f = makeField(it, st, ns);
|
||||
fieldNames.add(f.name);
|
||||
fieldAliases.push_back(f.aliases);
|
||||
fieldValues.add(f.schema);
|
||||
defaultValues.push_back(f.defaultValue);
|
||||
customAttributes.add(f.customAttributes);
|
||||
}
|
||||
|
||||
NodeRecord* node;
|
||||
if (doc == nullptr) {
|
||||
node = new NodeRecord(asSingleAttribute(name),
|
||||
fieldValues,
|
||||
fieldNames,
|
||||
fieldAliases,
|
||||
defaultValues,
|
||||
customAttributes);
|
||||
} else {
|
||||
node = new NodeRecord(asSingleAttribute(name),
|
||||
asSingleAttribute(*doc),
|
||||
fieldValues,
|
||||
fieldNames,
|
||||
fieldAliases,
|
||||
defaultValues,
|
||||
customAttributes);
|
||||
}
|
||||
return NodePtr(node);
|
||||
}
|
||||
|
||||
static LogicalType makeLogicalType(const Entity& e, const Object& m) {
|
||||
if (!containsField(m, "logicalType")) {
|
||||
return LogicalType(LogicalType::NONE);
|
||||
}
|
||||
|
||||
const std::string& typeField = getStringField(e, m, "logicalType");
|
||||
|
||||
if (typeField == "decimal") {
|
||||
LogicalType decimalType(LogicalType::DECIMAL);
|
||||
try {
|
||||
// Precision probably won't go over 38 and scale beyond -77/+77
|
||||
decimalType.setPrecision(static_cast<int32_t>(getLongField(e, m, "precision")));
|
||||
if (containsField(m, "scale")) {
|
||||
decimalType.setScale(static_cast<int32_t>(getLongField(e, m, "scale")));
|
||||
}
|
||||
} catch (Exception&) {
|
||||
// If any part of the logical type is malformed, per the standard we
|
||||
// must ignore the whole attribute.
|
||||
return LogicalType(LogicalType::NONE);
|
||||
}
|
||||
return decimalType;
|
||||
}
|
||||
|
||||
LogicalType::Type t = LogicalType::NONE;
|
||||
if (typeField == "date")
|
||||
t = LogicalType::DATE;
|
||||
else if (typeField == "time-millis")
|
||||
t = LogicalType::TIME_MILLIS;
|
||||
else if (typeField == "time-micros")
|
||||
t = LogicalType::TIME_MICROS;
|
||||
else if (typeField == "timestamp-millis")
|
||||
t = LogicalType::TIMESTAMP_MILLIS;
|
||||
else if (typeField == "timestamp-micros")
|
||||
t = LogicalType::TIMESTAMP_MICROS;
|
||||
else if (typeField == "duration")
|
||||
t = LogicalType::DURATION;
|
||||
else if (typeField == "uuid")
|
||||
t = LogicalType::UUID;
|
||||
return LogicalType(t);
|
||||
}
|
||||
|
||||
static NodePtr makeEnumNode(const Entity& e, const Name& name, const Object& m) {
|
||||
string symbolsName = "symbols";
|
||||
const Array& v = getArrayField(e, m, symbolsName);
|
||||
concepts::MultiAttribute<string> symbols;
|
||||
for (const auto& it : v) {
|
||||
if (it.type() != json::EntityType::String) {
|
||||
throw Exception("Enum symbol not a string: {}", it.toString());
|
||||
}
|
||||
symbols.add(it.stringValue());
|
||||
}
|
||||
NodePtr node = NodePtr(new NodeEnum(asSingleAttribute(name), symbols));
|
||||
if (containsField(m, "doc")) {
|
||||
node->setDoc(getDocField(e, m));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
static NodePtr makeFixedNode(const Entity& e, const Name& name, const Object& m) {
|
||||
int64_t v = getLongField(e, m, "size");
|
||||
if (v <= 0) {
|
||||
throw Exception("Size for fixed is not positive: {}", e.toString());
|
||||
}
|
||||
NodePtr node =
|
||||
NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(static_cast<size_t>(v))));
|
||||
if (containsField(m, "doc")) {
|
||||
node->setDoc(getDocField(e, m));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
static NodePtr makeArrayNode(const Entity& e, const Object& m, SymbolTable& st, const string& ns) {
|
||||
auto it = findField(e, m, "items");
|
||||
NodePtr node = NodePtr(new NodeArray(asSingleAttribute(makeNode(it->second, st, ns))));
|
||||
if (containsField(m, "doc")) {
|
||||
node->setDoc(getDocField(e, m));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
static NodePtr makeMapNode(const Entity& e, const Object& m, SymbolTable& st, const string& ns) {
|
||||
auto it = findField(e, m, "values");
|
||||
|
||||
NodePtr node = NodePtr(new NodeMap(asSingleAttribute(makeNode(it->second, st, ns))));
|
||||
if (containsField(m, "doc")) {
|
||||
node->setDoc(getDocField(e, m));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
static Name getName(const Entity& e, const Object& m, const string& ns) {
|
||||
const string& name = getStringField(e, m, "name");
|
||||
|
||||
Name result;
|
||||
if (isFullName(name)) {
|
||||
result = Name(name);
|
||||
} else {
|
||||
auto it = m.find("namespace");
|
||||
if (it != m.end()) {
|
||||
if (it->second.type() != json::type_traits<string>::type()) {
|
||||
throw Exception("Json field \"namespace\" is not a string: {}",
|
||||
it->second.toString());
|
||||
}
|
||||
result = Name(name, it->second.stringValue());
|
||||
} else {
|
||||
result = Name(name, ns);
|
||||
}
|
||||
}
|
||||
|
||||
std::string aliases = "aliases";
|
||||
if (containsField(m, aliases)) {
|
||||
for (const auto& alias : getArrayField(e, m, aliases)) {
|
||||
result.addAlias(alias.stringValue());
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static NodePtr makeNode(const Entity& e, const Object& m, SymbolTable& st, const string& ns) {
|
||||
const string& type = getStringField(e, m, "type");
|
||||
NodePtr result;
|
||||
if (type == "record" || type == "error" || type == "enum" || type == "fixed") {
|
||||
Name nm = getName(e, m, ns);
|
||||
if (type == "record" || type == "error") {
|
||||
result = NodePtr(new NodeRecord());
|
||||
st[nm] = result;
|
||||
// Get field doc
|
||||
if (containsField(m, "doc")) {
|
||||
string doc = getDocField(e, m);
|
||||
|
||||
NodePtr r = makeRecordNode(e, nm, &doc, m, st, nm.ns());
|
||||
(std::dynamic_pointer_cast<NodeRecord>(r))
|
||||
->swap(*std::dynamic_pointer_cast<NodeRecord>(result));
|
||||
} else { // No doc
|
||||
NodePtr r = makeRecordNode(e, nm, nullptr, m, st, nm.ns());
|
||||
(std::dynamic_pointer_cast<NodeRecord>(r))
|
||||
->swap(*std::dynamic_pointer_cast<NodeRecord>(result));
|
||||
}
|
||||
} else {
|
||||
result = (type == "enum") ? makeEnumNode(e, nm, m) : makeFixedNode(e, nm, m);
|
||||
st[nm] = result;
|
||||
}
|
||||
} else if (type == "array") {
|
||||
result = makeArrayNode(e, m, st, ns);
|
||||
} else if (type == "map") {
|
||||
result = makeMapNode(e, m, st, ns);
|
||||
} else {
|
||||
result = makePrimitive(type);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
try {
|
||||
result->setLogicalType(makeLogicalType(e, m));
|
||||
} catch (Exception&) {
|
||||
// Per the standard we must ignore the logical type attribute if it
|
||||
// is malformed.
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
throw Exception("Unknown type definition: %1%", e.toString());
|
||||
}
|
||||
|
||||
static NodePtr makeNode(const Entity&, const Array& m, SymbolTable& st, const string& ns) {
|
||||
concepts::MultiAttribute<NodePtr> mm;
|
||||
for (const auto& it : m) {
|
||||
mm.add(makeNode(it, st, ns));
|
||||
}
|
||||
return NodePtr(new NodeUnion(mm));
|
||||
}
|
||||
|
||||
static NodePtr makeNode(const json::Entity& e, SymbolTable& st, const string& ns) {
|
||||
switch (e.type()) {
|
||||
case json::EntityType::String:
|
||||
return makeNode(e.stringValue(), st, ns);
|
||||
case json::EntityType::Obj:
|
||||
return makeNode(e, e.objectValue(), st, ns);
|
||||
case json::EntityType::Arr:
|
||||
return makeNode(e, e.arrayValue(), st, ns);
|
||||
default:
|
||||
throw Exception("Invalid Avro type: {}", e.toString());
|
||||
}
|
||||
}
|
||||
json::Object::const_iterator findField(const Entity& e, const Object& m, const string& fieldName) {
|
||||
auto it = m.find(fieldName);
|
||||
if (it == m.end()) {
|
||||
throw Exception("Missing Json field \"{}\": {}", fieldName, e.toString());
|
||||
} else {
|
||||
return it;
|
||||
}
|
||||
}
|
||||
const Array& getArrayField(const Entity& e, const Object& m, const string& fieldName) {
|
||||
auto it = findField(e, m, fieldName);
|
||||
ensureType<Array>(it->second, fieldName);
|
||||
return it->second.arrayValue();
|
||||
}
|
||||
|
||||
ValidSchema compileJsonSchemaFromStream(InputStream& is) {
|
||||
json::Entity e = json::loadEntity(is);
|
||||
SymbolTable st;
|
||||
NodePtr n = makeNode(e, st, "");
|
||||
return ValidSchema(n);
|
||||
}
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char* filename) {
|
||||
std::unique_ptr<InputStream> s = fileInputStream(filename);
|
||||
return compileJsonSchemaFromStream(*s);
|
||||
}
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromMemory(const uint8_t* input, size_t len) {
|
||||
return compileJsonSchemaFromStream(*memoryInputStream(input, len));
|
||||
}
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromString(const char* input) {
|
||||
return compileJsonSchemaFromMemory(reinterpret_cast<const uint8_t*>(input), ::strlen(input));
|
||||
}
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromString(const string& input) {
|
||||
return compileJsonSchemaFromMemory(reinterpret_cast<const uint8_t*>(input.data()),
|
||||
input.size());
|
||||
}
|
||||
|
||||
static ValidSchema compile(std::istream& is) {
|
||||
std::unique_ptr<InputStream> in = istreamInputStream(is);
|
||||
return compileJsonSchemaFromStream(*in);
|
||||
}
|
||||
|
||||
void compileJsonSchema(std::istream& is, ValidSchema& schema) {
|
||||
if (!is.good()) {
|
||||
throw Exception("Input stream is not good");
|
||||
}
|
||||
|
||||
schema = compile(is);
|
||||
}
|
||||
|
||||
AVRO_DECL bool compileJsonSchema(std::istream& is, ValidSchema& schema, string& error) {
|
||||
try {
|
||||
compileJsonSchema(is, schema);
|
||||
return true;
|
||||
} catch (const Exception& e) {
|
||||
error = e.what();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "CustomAttributes.hh"
|
||||
#include "Exception.hh"
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
namespace avro {
|
||||
|
||||
boost::optional<std::string> CustomAttributes::getAttribute(const std::string &name) const {
|
||||
boost::optional<std::string> result;
|
||||
std::map<std::string, std::string>::const_iterator iter =
|
||||
attributes_.find(name);
|
||||
if (iter == attributes_.end()) {
|
||||
return result;
|
||||
}
|
||||
result = iter->second;
|
||||
return result;
|
||||
}
|
||||
|
||||
void CustomAttributes::addAttribute(const std::string &name,
|
||||
const std::string &value) {
|
||||
auto iter_and_find =
|
||||
attributes_.insert(std::pair<std::string, std::string>(name, value));
|
||||
if (!iter_and_find.second) {
|
||||
throw Exception(name + " already exists and cannot be added");
|
||||
}
|
||||
}
|
||||
|
||||
void CustomAttributes::printJson(std::ostream &os,
|
||||
const std::string &name) const {
|
||||
if (attributes().find(name) == attributes().end()) {
|
||||
throw Exception(name + " doesn't exist");
|
||||
}
|
||||
os << "\"" << name << "\": \"" << attributes().at(name) << "\"";
|
||||
}
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,566 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "DataFile.hh"
|
||||
#include "Compiler.hh"
|
||||
#include "Exception.hh"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/crc.hpp> // for boost::crc_32_type
|
||||
#include <boost/iostreams/device/file.hpp>
|
||||
#include <boost/iostreams/filter/gzip.hpp>
|
||||
#include <boost/iostreams/filter/zlib.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
|
||||
#ifdef SNAPPY_CODEC_AVAILABLE
|
||||
#include <snappy.h>
|
||||
#endif
|
||||
|
||||
namespace avro {
|
||||
using std::copy;
|
||||
using std::istringstream;
|
||||
using std::ostringstream;
|
||||
using std::string;
|
||||
using std::unique_ptr;
|
||||
using std::vector;
|
||||
|
||||
using std::array;
|
||||
|
||||
namespace {
|
||||
const string AVRO_SCHEMA_KEY("avro.schema");
|
||||
const string AVRO_CODEC_KEY("avro.codec");
|
||||
const string AVRO_NULL_CODEC("null");
|
||||
const string AVRO_DEFLATE_CODEC("deflate");
|
||||
|
||||
#ifdef SNAPPY_CODEC_AVAILABLE
|
||||
const string AVRO_SNAPPY_CODEC = "snappy";
|
||||
#endif
|
||||
|
||||
const size_t minSyncInterval = 32;
|
||||
const size_t maxSyncInterval = 1u << 30;
|
||||
|
||||
boost::iostreams::zlib_params get_zlib_params() {
|
||||
boost::iostreams::zlib_params ret;
|
||||
ret.method = boost::iostreams::zlib::deflated;
|
||||
ret.noheader = true;
|
||||
return ret;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
DataFileWriterBase::DataFileWriterBase(const char *filename, const ValidSchema &schema, size_t syncInterval,
|
||||
Codec codec) : filename_(filename),
|
||||
schema_(schema),
|
||||
encoderPtr_(binaryEncoder()),
|
||||
syncInterval_(syncInterval),
|
||||
codec_(codec),
|
||||
stream_(fileOutputStream(filename)),
|
||||
buffer_(memoryOutputStream()),
|
||||
sync_(makeSync()),
|
||||
objectCount_(0),
|
||||
lastSync_(0) {
|
||||
init(schema, syncInterval, codec);
|
||||
}
|
||||
|
||||
DataFileWriterBase::DataFileWriterBase(std::unique_ptr<OutputStream> outputStream,
|
||||
const ValidSchema &schema, size_t syncInterval, Codec codec) : filename_(),
|
||||
schema_(schema),
|
||||
encoderPtr_(binaryEncoder()),
|
||||
syncInterval_(syncInterval),
|
||||
codec_(codec),
|
||||
stream_(std::move(outputStream)),
|
||||
buffer_(memoryOutputStream()),
|
||||
sync_(makeSync()),
|
||||
objectCount_(0),
|
||||
lastSync_(0) {
|
||||
init(schema, syncInterval, codec);
|
||||
}
|
||||
|
||||
void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, const Codec &codec) {
|
||||
if (syncInterval < minSyncInterval || syncInterval > maxSyncInterval) {
|
||||
throw Exception(
|
||||
"Invalid sync interval: {}. Should be between {} and {}",
|
||||
syncInterval, minSyncInterval, maxSyncInterval);
|
||||
}
|
||||
setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC);
|
||||
|
||||
if (codec_ == NULL_CODEC) {
|
||||
setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC);
|
||||
} else if (codec_ == DEFLATE_CODEC) {
|
||||
setMetadata(AVRO_CODEC_KEY, AVRO_DEFLATE_CODEC);
|
||||
#ifdef SNAPPY_CODEC_AVAILABLE
|
||||
} else if (codec_ == SNAPPY_CODEC) {
|
||||
setMetadata(AVRO_CODEC_KEY, AVRO_SNAPPY_CODEC);
|
||||
#endif
|
||||
} else {
|
||||
throw Exception("Unknown codec: {}", int(codec));
|
||||
}
|
||||
setMetadata(AVRO_SCHEMA_KEY, schema.toJson(false));
|
||||
|
||||
writeHeader();
|
||||
encoderPtr_->init(*buffer_);
|
||||
|
||||
lastSync_ = stream_->byteCount();
|
||||
}
|
||||
|
||||
DataFileWriterBase::~DataFileWriterBase() {
|
||||
if (stream_) {
|
||||
try {
|
||||
close();
|
||||
} catch (...) {}
|
||||
}
|
||||
}
|
||||
|
||||
void DataFileWriterBase::close() {
|
||||
flush();
|
||||
stream_.reset();
|
||||
}
|
||||
|
||||
void DataFileWriterBase::sync() {
|
||||
encoderPtr_->flush();
|
||||
|
||||
encoderPtr_->init(*stream_);
|
||||
avro::encode(*encoderPtr_, objectCount_);
|
||||
if (codec_ == NULL_CODEC) {
|
||||
int64_t byteCount = buffer_->byteCount();
|
||||
avro::encode(*encoderPtr_, byteCount);
|
||||
encoderPtr_->flush();
|
||||
std::unique_ptr<InputStream> in = memoryInputStream(*buffer_);
|
||||
copy(*in, *stream_);
|
||||
} else if (codec_ == DEFLATE_CODEC) {
|
||||
std::vector<char> buf;
|
||||
{
|
||||
boost::iostreams::filtering_ostream os;
|
||||
os.push(boost::iostreams::zlib_compressor(get_zlib_params()));
|
||||
os.push(boost::iostreams::back_inserter(buf));
|
||||
const uint8_t *data;
|
||||
size_t len;
|
||||
|
||||
std::unique_ptr<InputStream> input = memoryInputStream(*buffer_);
|
||||
while (input->next(&data, &len)) {
|
||||
boost::iostreams::write(os, reinterpret_cast<const char *>(data), len);
|
||||
}
|
||||
} // make sure all is flushed
|
||||
std::unique_ptr<InputStream> in = memoryInputStream(
|
||||
reinterpret_cast<const uint8_t *>(buf.data()), buf.size());
|
||||
int64_t byteCount = buf.size();
|
||||
avro::encode(*encoderPtr_, byteCount);
|
||||
encoderPtr_->flush();
|
||||
copy(*in, *stream_);
|
||||
#ifdef SNAPPY_CODEC_AVAILABLE
|
||||
} else if (codec_ == SNAPPY_CODEC) {
|
||||
std::vector<char> temp;
|
||||
std::string compressed;
|
||||
boost::crc_32_type crc;
|
||||
{
|
||||
boost::iostreams::filtering_ostream os;
|
||||
os.push(boost::iostreams::back_inserter(temp));
|
||||
const uint8_t *data;
|
||||
size_t len;
|
||||
|
||||
std::unique_ptr<InputStream> input = memoryInputStream(*buffer_);
|
||||
while (input->next(&data, &len)) {
|
||||
boost::iostreams::write(os, reinterpret_cast<const char *>(data),
|
||||
len);
|
||||
}
|
||||
} // make sure all is flushed
|
||||
|
||||
crc.process_bytes(reinterpret_cast<const char *>(temp.data()),
|
||||
temp.size());
|
||||
// For Snappy, add the CRC32 checksum
|
||||
int32_t checksum = crc();
|
||||
|
||||
// Now compress
|
||||
size_t compressed_size = snappy::Compress(
|
||||
reinterpret_cast<const char *>(temp.data()), temp.size(),
|
||||
&compressed);
|
||||
temp.clear();
|
||||
{
|
||||
boost::iostreams::filtering_ostream os;
|
||||
os.push(boost::iostreams::back_inserter(temp));
|
||||
boost::iostreams::write(os, compressed.c_str(), compressed_size);
|
||||
}
|
||||
temp.push_back(static_cast<char>((checksum >> 24) & 0xFF));
|
||||
temp.push_back(static_cast<char>((checksum >> 16) & 0xFF));
|
||||
temp.push_back(static_cast<char>((checksum >> 8) & 0xFF));
|
||||
temp.push_back(static_cast<char>(checksum & 0xFF));
|
||||
std::unique_ptr<InputStream> in = memoryInputStream(
|
||||
reinterpret_cast<const uint8_t *>(temp.data()), temp.size());
|
||||
int64_t byteCount = temp.size();
|
||||
avro::encode(*encoderPtr_, byteCount);
|
||||
encoderPtr_->flush();
|
||||
copy(*in, *stream_);
|
||||
#endif
|
||||
}
|
||||
|
||||
encoderPtr_->init(*stream_);
|
||||
avro::encode(*encoderPtr_, sync_);
|
||||
encoderPtr_->flush();
|
||||
|
||||
lastSync_ = stream_->byteCount();
|
||||
|
||||
buffer_ = memoryOutputStream();
|
||||
encoderPtr_->init(*buffer_);
|
||||
objectCount_ = 0;
|
||||
}
|
||||
|
||||
void DataFileWriterBase::syncIfNeeded() {
|
||||
encoderPtr_->flush();
|
||||
if (buffer_->byteCount() >= syncInterval_) {
|
||||
sync();
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t DataFileWriterBase::getCurrentBlockStart() const {
|
||||
return lastSync_;
|
||||
}
|
||||
|
||||
void DataFileWriterBase::flush() {
|
||||
sync();
|
||||
}
|
||||
|
||||
DataFileSync DataFileWriterBase::makeSync() {
|
||||
boost::mt19937 random(static_cast<uint32_t>(time(nullptr)));
|
||||
DataFileSync sync;
|
||||
std::generate(sync.begin(), sync.end(), random);
|
||||
return sync;
|
||||
}
|
||||
|
||||
typedef array<uint8_t, 4> Magic;
|
||||
static Magic magic = {{'O', 'b', 'j', '\x01'}};
|
||||
|
||||
void DataFileWriterBase::writeHeader() {
|
||||
encoderPtr_->init(*stream_);
|
||||
avro::encode(*encoderPtr_, magic);
|
||||
avro::encode(*encoderPtr_, metadata_);
|
||||
avro::encode(*encoderPtr_, sync_);
|
||||
encoderPtr_->flush();
|
||||
}
|
||||
|
||||
void DataFileWriterBase::setMetadata(const string &key, const string &value) {
|
||||
vector<uint8_t> v(value.size());
|
||||
copy(value.begin(), value.end(), v.begin());
|
||||
metadata_[key] = v;
|
||||
}
|
||||
|
||||
DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), stream_(fileSeekableInputStream(filename)),
|
||||
decoder_(binaryDecoder()), objectCount_(0), eof_(false),
|
||||
codec_(NULL_CODEC), blockStart_(-1), blockEnd_(-1) {
|
||||
readHeader();
|
||||
}
|
||||
|
||||
DataFileReaderBase::DataFileReaderBase(std::unique_ptr<InputStream> inputStream) : stream_(std::move(inputStream)),
|
||||
decoder_(binaryDecoder()), objectCount_(0), eof_(false), codec_(NULL_CODEC) {
|
||||
readHeader();
|
||||
}
|
||||
|
||||
void DataFileReaderBase::init() {
|
||||
readerSchema_ = dataSchema_;
|
||||
dataDecoder_ = binaryDecoder();
|
||||
readDataBlock();
|
||||
}
|
||||
|
||||
void DataFileReaderBase::init(const ValidSchema &readerSchema) {
|
||||
readerSchema_ = readerSchema;
|
||||
dataDecoder_ = (readerSchema_.toJson(true) != dataSchema_.toJson(true)) ? resolvingDecoder(dataSchema_, readerSchema_, binaryDecoder()) : binaryDecoder();
|
||||
readDataBlock();
|
||||
}
|
||||
|
||||
static void drain(InputStream &in) {
|
||||
const uint8_t *p = nullptr;
|
||||
size_t n = 0;
|
||||
while (in.next(&p, &n))
|
||||
;
|
||||
}
|
||||
|
||||
char hex(unsigned int x) {
|
||||
return static_cast<char>(x + (x < 10 ? '0' : ('a' - 10)));
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const DataFileSync &s) {
|
||||
for (uint8_t i : s) {
|
||||
os << hex(i / 16) << hex(i % 16) << ' ';
|
||||
}
|
||||
os << std::endl;
|
||||
return os;
|
||||
}
|
||||
|
||||
bool DataFileReaderBase::hasMore() {
|
||||
for (;;) {
|
||||
if (eof_) {
|
||||
return false;
|
||||
} else if (objectCount_ != 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
dataDecoder_->init(*dataStream_);
|
||||
drain(*dataStream_);
|
||||
DataFileSync s;
|
||||
decoder_->init(*stream_);
|
||||
avro::decode(*decoder_, s);
|
||||
if (s != sync_) {
|
||||
throw Exception("Sync mismatch");
|
||||
}
|
||||
readDataBlock();
|
||||
}
|
||||
}
|
||||
|
||||
class BoundedInputStream : public InputStream {
|
||||
InputStream &in_;
|
||||
size_t limit_;
|
||||
|
||||
bool next(const uint8_t **data, size_t *len) final {
|
||||
if (limit_ != 0 && in_.next(data, len)) {
|
||||
if (*len > limit_) {
|
||||
in_.backup(*len - limit_);
|
||||
*len = limit_;
|
||||
}
|
||||
limit_ -= *len;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void backup(size_t len) final {
|
||||
in_.backup(len);
|
||||
limit_ += len;
|
||||
}
|
||||
|
||||
void skip(size_t len) final {
|
||||
if (len > limit_) {
|
||||
len = limit_;
|
||||
}
|
||||
in_.skip(len);
|
||||
limit_ -= len;
|
||||
}
|
||||
|
||||
size_t byteCount() const final {
|
||||
return in_.byteCount();
|
||||
}
|
||||
|
||||
public:
|
||||
BoundedInputStream(InputStream &in, size_t limit) : in_(in), limit_(limit) {}
|
||||
};
|
||||
|
||||
unique_ptr<InputStream> boundedInputStream(InputStream &in, size_t limit) {
|
||||
return unique_ptr<InputStream>(new BoundedInputStream(in, limit));
|
||||
}
|
||||
|
||||
void DataFileReaderBase::readDataBlock() {
|
||||
decoder_->init(*stream_);
|
||||
blockStart_ = stream_->byteCount();
|
||||
const uint8_t *p = nullptr;
|
||||
size_t n = 0;
|
||||
if (!stream_->next(&p, &n)) {
|
||||
eof_ = true;
|
||||
return;
|
||||
}
|
||||
stream_->backup(n);
|
||||
avro::decode(*decoder_, objectCount_);
|
||||
int64_t byteCount;
|
||||
avro::decode(*decoder_, byteCount);
|
||||
decoder_->init(*stream_);
|
||||
blockEnd_ = stream_->byteCount() + byteCount;
|
||||
|
||||
unique_ptr<InputStream> st = boundedInputStream(*stream_, static_cast<size_t>(byteCount));
|
||||
if (codec_ == NULL_CODEC) {
|
||||
dataDecoder_->init(*st);
|
||||
dataStream_ = std::move(st);
|
||||
#ifdef SNAPPY_CODEC_AVAILABLE
|
||||
} else if (codec_ == SNAPPY_CODEC) {
|
||||
boost::crc_32_type crc;
|
||||
uint32_t checksum = 0;
|
||||
compressed_.clear();
|
||||
uncompressed.clear();
|
||||
const uint8_t *data;
|
||||
size_t len;
|
||||
while (st->next(&data, &len)) {
|
||||
compressed_.insert(compressed_.end(), data, data + len);
|
||||
}
|
||||
len = compressed_.size();
|
||||
if (len < 4)
|
||||
throw Exception("Cannot read compressed data, expected at least 4 bytes, got " + std::to_string(len));
|
||||
|
||||
int b1 = compressed_[len - 4] & 0xFF;
|
||||
int b2 = compressed_[len - 3] & 0xFF;
|
||||
int b3 = compressed_[len - 2] & 0xFF;
|
||||
int b4 = compressed_[len - 1] & 0xFF;
|
||||
|
||||
checksum = (b1 << 24) + (b2 << 16) + (b3 << 8) + (b4);
|
||||
if (!snappy::Uncompress(reinterpret_cast<const char *>(compressed_.data()),
|
||||
len - 4, &uncompressed)) {
|
||||
throw Exception(
|
||||
"Snappy Compression reported an error when decompressing");
|
||||
}
|
||||
crc.process_bytes(uncompressed.c_str(), uncompressed.size());
|
||||
uint32_t c = crc();
|
||||
if (checksum != c) {
|
||||
throw Exception(
|
||||
"Checksum did not match for Snappy compression: Expected: {}, computed: {}",
|
||||
checksum, c);
|
||||
}
|
||||
os_.reset(new boost::iostreams::filtering_istream());
|
||||
os_->push(
|
||||
boost::iostreams::basic_array_source<char>(uncompressed.c_str(),
|
||||
uncompressed.size()));
|
||||
std::unique_ptr<InputStream> in = istreamInputStream(*os_);
|
||||
|
||||
dataDecoder_->init(*in);
|
||||
dataStream_ = std::move(in);
|
||||
#endif
|
||||
} else {
|
||||
compressed_.clear();
|
||||
const uint8_t *data;
|
||||
size_t len;
|
||||
while (st->next(&data, &len)) {
|
||||
compressed_.insert(compressed_.end(), data, data + len);
|
||||
}
|
||||
os_.reset(new boost::iostreams::filtering_istream());
|
||||
os_->push(boost::iostreams::zlib_decompressor(get_zlib_params()));
|
||||
os_->push(boost::iostreams::basic_array_source<char>(
|
||||
compressed_.data(), compressed_.size()));
|
||||
|
||||
std::unique_ptr<InputStream> in = nonSeekableIstreamInputStream(*os_);
|
||||
dataDecoder_->init(*in);
|
||||
dataStream_ = std::move(in);
|
||||
}
|
||||
}
|
||||
|
||||
void DataFileReaderBase::close() {
|
||||
}
|
||||
|
||||
static string toString(const vector<uint8_t> &v) {
|
||||
string result;
|
||||
result.resize(v.size());
|
||||
copy(v.begin(), v.end(), result.begin());
|
||||
return result;
|
||||
}
|
||||
|
||||
static ValidSchema makeSchema(const vector<uint8_t> &v) {
|
||||
istringstream iss(toString(v));
|
||||
ValidSchema vs;
|
||||
compileJsonSchema(iss, vs);
|
||||
return vs;
|
||||
}
|
||||
|
||||
void DataFileReaderBase::readHeader() {
|
||||
decoder_->init(*stream_);
|
||||
Magic m;
|
||||
avro::decode(*decoder_, m);
|
||||
if (magic != m) {
|
||||
throw Exception("Invalid data file. Magic does not match: "
|
||||
+ filename_);
|
||||
}
|
||||
avro::decode(*decoder_, metadata_);
|
||||
Metadata::const_iterator it = metadata_.find(AVRO_SCHEMA_KEY);
|
||||
if (it == metadata_.end()) {
|
||||
throw Exception("No schema in metadata");
|
||||
}
|
||||
|
||||
dataSchema_ = makeSchema(it->second);
|
||||
if (!readerSchema_.root()) {
|
||||
readerSchema_ = dataSchema();
|
||||
}
|
||||
|
||||
it = metadata_.find(AVRO_CODEC_KEY);
|
||||
if (it != metadata_.end() && toString(it->second) == AVRO_DEFLATE_CODEC) {
|
||||
codec_ = DEFLATE_CODEC;
|
||||
#ifdef SNAPPY_CODEC_AVAILABLE
|
||||
} else if (it != metadata_.end()
|
||||
&& toString(it->second) == AVRO_SNAPPY_CODEC) {
|
||||
codec_ = SNAPPY_CODEC;
|
||||
#endif
|
||||
} else {
|
||||
codec_ = NULL_CODEC;
|
||||
if (it != metadata_.end() && toString(it->second) != AVRO_NULL_CODEC) {
|
||||
throw Exception("Unknown codec in data file: " + toString(it->second));
|
||||
}
|
||||
}
|
||||
|
||||
avro::decode(*decoder_, sync_);
|
||||
decoder_->init(*stream_);
|
||||
blockStart_ = stream_->byteCount();
|
||||
}
|
||||
|
||||
void DataFileReaderBase::doSeek(int64_t position) {
|
||||
if (auto *ss = dynamic_cast<SeekableInputStream *>(stream_.get())) {
|
||||
if (!eof_) {
|
||||
dataDecoder_->init(*dataStream_);
|
||||
drain(*dataStream_);
|
||||
}
|
||||
decoder_->init(*stream_);
|
||||
ss->seek(position);
|
||||
eof_ = false;
|
||||
} else {
|
||||
throw Exception("seek not supported on non-SeekableInputStream");
|
||||
}
|
||||
}
|
||||
|
||||
void DataFileReaderBase::seek(int64_t position) {
|
||||
doSeek(position);
|
||||
readDataBlock();
|
||||
}
|
||||
|
||||
void DataFileReaderBase::sync(int64_t position) {
|
||||
doSeek(position);
|
||||
DataFileSync sync_buffer;
|
||||
const uint8_t *p = nullptr;
|
||||
size_t n = 0;
|
||||
size_t i = 0;
|
||||
while (i < SyncSize) {
|
||||
if (n == 0 && !stream_->next(&p, &n)) {
|
||||
eof_ = true;
|
||||
return;
|
||||
}
|
||||
size_t len = std::min(SyncSize - i, n);
|
||||
memcpy(&sync_buffer[i], p, len);
|
||||
p += len;
|
||||
n -= len;
|
||||
i += len;
|
||||
}
|
||||
for (;;) {
|
||||
size_t j = 0;
|
||||
for (; j < SyncSize; ++j) {
|
||||
if (sync_[j] != sync_buffer[(i + j) % SyncSize]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == SyncSize) {
|
||||
// Found the sync marker!
|
||||
break;
|
||||
}
|
||||
if (n == 0 && !stream_->next(&p, &n)) {
|
||||
eof_ = true;
|
||||
return;
|
||||
}
|
||||
sync_buffer[i++ % SyncSize] = *p++;
|
||||
--n;
|
||||
}
|
||||
stream_->backup(n);
|
||||
readDataBlock();
|
||||
}
|
||||
|
||||
bool DataFileReaderBase::pastSync(int64_t position) {
|
||||
return !hasMore() || blockStart_ >= position + SyncSize;
|
||||
}
|
||||
|
||||
int64_t DataFileReaderBase::previousSync() const {
|
||||
return blockStart_;
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,375 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Stream.hh"
|
||||
#include <fstream>
|
||||
#ifndef _WIN32
|
||||
#include "fcntl.h"
|
||||
#include "unistd.h"
|
||||
#include <cerrno>
|
||||
|
||||
#ifndef O_BINARY
|
||||
#define O_BINARY 0
|
||||
#endif
|
||||
#else
|
||||
#include "Windows.h"
|
||||
|
||||
#ifdef min
|
||||
#undef min
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using std::istream;
|
||||
using std::ostream;
|
||||
using std::unique_ptr;
|
||||
|
||||
namespace avro {
|
||||
namespace {
|
||||
struct BufferCopyIn {
|
||||
virtual ~BufferCopyIn() = default;
|
||||
virtual void seek(size_t len) = 0;
|
||||
virtual bool read(uint8_t *b, size_t toRead, size_t &actual) = 0;
|
||||
};
|
||||
|
||||
struct FileBufferCopyIn : public BufferCopyIn {
|
||||
#ifdef _WIN32
|
||||
HANDLE h_;
|
||||
explicit FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
|
||||
if (h_ == INVALID_HANDLE_VALUE) {
|
||||
throw Exception("Cannot open file: {}", ::GetLastError());
|
||||
}
|
||||
}
|
||||
|
||||
~FileBufferCopyIn() {
|
||||
::CloseHandle(h_);
|
||||
}
|
||||
|
||||
void seek(size_t len) override {
|
||||
if (::SetFilePointer(h_, len, NULL, FILE_CURRENT) == INVALID_SET_FILE_POINTER && ::GetLastError() != NO_ERROR) {
|
||||
throw Exception("Cannot skip file: {}", ::GetLastError());
|
||||
}
|
||||
}
|
||||
|
||||
bool read(uint8_t *b, size_t toRead, size_t &actual) override {
|
||||
DWORD dw = 0;
|
||||
if (!::ReadFile(h_, b, toRead, &dw, NULL)) {
|
||||
throw Exception("Cannot read file: {}", ::GetLastError());
|
||||
}
|
||||
actual = static_cast<size_t>(dw);
|
||||
return actual != 0;
|
||||
}
|
||||
#else
|
||||
const int fd_;
|
||||
|
||||
explicit FileBufferCopyIn(const char *filename) : fd_(open(filename, O_RDONLY | O_BINARY)) {
|
||||
if (fd_ < 0) {
|
||||
throw Exception("Cannot open file: {}", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
~FileBufferCopyIn() override {
|
||||
::close(fd_);
|
||||
}
|
||||
|
||||
void seek(size_t len) final {
|
||||
off_t r = ::lseek(fd_, len, SEEK_CUR);
|
||||
if (r == static_cast<off_t>(-1)) {
|
||||
throw Exception("Cannot skip file: {}", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
bool read(uint8_t *b, size_t toRead, size_t &actual) final {
|
||||
auto n = ::read(fd_, b, toRead);
|
||||
if (n > 0) {
|
||||
actual = n;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
struct IStreamBufferCopyIn : public BufferCopyIn {
|
||||
istream &is_;
|
||||
|
||||
explicit IStreamBufferCopyIn(istream &is) : is_(is) {
|
||||
}
|
||||
|
||||
void seek(size_t len) override {
|
||||
if (!is_.seekg(len, std::ios_base::cur)) {
|
||||
throw Exception("Cannot skip stream");
|
||||
}
|
||||
}
|
||||
|
||||
bool read(uint8_t *b, size_t toRead, size_t &actual) override {
|
||||
is_.read(reinterpret_cast<char *>(b), toRead);
|
||||
if (is_.bad()) {
|
||||
return false;
|
||||
}
|
||||
actual = static_cast<size_t>(is_.gcount());
|
||||
return (!is_.eof() || actual != 0);
|
||||
}
|
||||
};
|
||||
|
||||
struct NonSeekableIStreamBufferCopyIn : public IStreamBufferCopyIn {
|
||||
explicit NonSeekableIStreamBufferCopyIn(istream &is) : IStreamBufferCopyIn(is) {}
|
||||
|
||||
void seek(size_t len) final {
|
||||
const size_t bufSize = 4096;
|
||||
uint8_t buf[bufSize];
|
||||
while (len > 0) {
|
||||
size_t n = std::min(len, bufSize);
|
||||
is_.read(reinterpret_cast<char *>(buf), n);
|
||||
if (is_.bad()) {
|
||||
throw Exception("Cannot skip stream");
|
||||
}
|
||||
auto actual = static_cast<size_t>(is_.gcount());
|
||||
if (is_.eof() && actual == 0) {
|
||||
throw Exception("Cannot skip stream");
|
||||
}
|
||||
len -= n;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
class BufferCopyInInputStream : public SeekableInputStream {
|
||||
const size_t bufferSize_;
|
||||
uint8_t *const buffer_;
|
||||
unique_ptr<BufferCopyIn> in_;
|
||||
size_t byteCount_;
|
||||
uint8_t *next_;
|
||||
size_t available_;
|
||||
|
||||
bool next(const uint8_t **data, size_t *size) final {
|
||||
if (available_ == 0 && !fill()) {
|
||||
return false;
|
||||
}
|
||||
*data = next_;
|
||||
*size = available_;
|
||||
next_ += available_;
|
||||
byteCount_ += available_;
|
||||
available_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void backup(size_t len) final {
|
||||
next_ -= len;
|
||||
available_ += len;
|
||||
byteCount_ -= len;
|
||||
}
|
||||
|
||||
void skip(size_t len) final {
|
||||
while (len > 0) {
|
||||
if (available_ == 0) {
|
||||
in_->seek(len);
|
||||
byteCount_ += len;
|
||||
return;
|
||||
}
|
||||
size_t n = std::min(available_, len);
|
||||
available_ -= n;
|
||||
next_ += n;
|
||||
len -= n;
|
||||
byteCount_ += n;
|
||||
}
|
||||
}
|
||||
|
||||
size_t byteCount() const final { return byteCount_; }
|
||||
|
||||
bool fill() {
|
||||
size_t n = 0;
|
||||
if (in_->read(buffer_, bufferSize_, n)) {
|
||||
next_ = buffer_;
|
||||
available_ = n;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void seek(int64_t position) final {
|
||||
// BufferCopyIn::seek is relative to byteCount_, whereas position is
|
||||
// absolute.
|
||||
in_->seek(position - byteCount_ - available_);
|
||||
byteCount_ = position;
|
||||
available_ = 0;
|
||||
}
|
||||
|
||||
public:
|
||||
BufferCopyInInputStream(unique_ptr<BufferCopyIn> in, size_t bufferSize) : bufferSize_(bufferSize),
|
||||
buffer_(new uint8_t[bufferSize]),
|
||||
in_(std::move(in)),
|
||||
byteCount_(0),
|
||||
next_(buffer_),
|
||||
available_(0) {}
|
||||
|
||||
~BufferCopyInInputStream() override {
|
||||
delete[] buffer_;
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
struct BufferCopyOut {
|
||||
virtual ~BufferCopyOut() = default;
|
||||
virtual void write(const uint8_t *b, size_t len) = 0;
|
||||
};
|
||||
|
||||
struct FileBufferCopyOut : public BufferCopyOut {
|
||||
#ifdef _WIN32
|
||||
HANDLE h_;
|
||||
explicit FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
|
||||
if (h_ == INVALID_HANDLE_VALUE) {
|
||||
throw Exception("Cannot open file: {}", ::GetLastError());
|
||||
}
|
||||
}
|
||||
|
||||
~FileBufferCopyOut() {
|
||||
::CloseHandle(h_);
|
||||
}
|
||||
|
||||
void write(const uint8_t *b, size_t len) override {
|
||||
while (len > 0) {
|
||||
DWORD dw = 0;
|
||||
if (!::WriteFile(h_, b, len, &dw, NULL)) {
|
||||
throw Exception("Cannot read file: {}", ::GetLastError());
|
||||
}
|
||||
b += dw;
|
||||
len -= dw;
|
||||
}
|
||||
}
|
||||
#else
|
||||
const int fd_;
|
||||
|
||||
explicit FileBufferCopyOut(const char *filename) : fd_(::open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644)) {
|
||||
|
||||
if (fd_ < 0) {
|
||||
throw Exception("Cannot open file: {}", ::strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
~FileBufferCopyOut() override {
|
||||
::close(fd_);
|
||||
}
|
||||
|
||||
void write(const uint8_t *b, size_t len) final {
|
||||
if (::write(fd_, b, len) < 0) {
|
||||
throw Exception("Cannot write file: {}", ::strerror(errno));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
struct OStreamBufferCopyOut : public BufferCopyOut {
|
||||
ostream &os_;
|
||||
|
||||
explicit OStreamBufferCopyOut(ostream &os) : os_(os) {
|
||||
}
|
||||
|
||||
void write(const uint8_t *b, size_t len) final {
|
||||
os_.write(reinterpret_cast<const char *>(b), len);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
class BufferCopyOutputStream : public OutputStream {
|
||||
size_t bufferSize_;
|
||||
uint8_t *const buffer_;
|
||||
unique_ptr<BufferCopyOut> out_;
|
||||
uint8_t *next_;
|
||||
size_t available_;
|
||||
size_t byteCount_;
|
||||
|
||||
// Invariant: byteCount_ == bytesWritten + bufferSize_ - available_;
|
||||
bool next(uint8_t **data, size_t *len) final {
|
||||
if (available_ == 0) {
|
||||
flush();
|
||||
}
|
||||
*data = next_;
|
||||
*len = available_;
|
||||
next_ += available_;
|
||||
byteCount_ += available_;
|
||||
available_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void backup(size_t len) final {
|
||||
available_ += len;
|
||||
next_ -= len;
|
||||
byteCount_ -= len;
|
||||
}
|
||||
|
||||
uint64_t byteCount() const final {
|
||||
return byteCount_;
|
||||
}
|
||||
|
||||
void flush() final {
|
||||
out_->write(buffer_, bufferSize_ - available_);
|
||||
next_ = buffer_;
|
||||
available_ = bufferSize_;
|
||||
}
|
||||
|
||||
public:
|
||||
BufferCopyOutputStream(unique_ptr<BufferCopyOut> out, size_t bufferSize) : bufferSize_(bufferSize),
|
||||
buffer_(new uint8_t[bufferSize]),
|
||||
out_(std::move(out)),
|
||||
next_(buffer_),
|
||||
available_(bufferSize_), byteCount_(0) {}
|
||||
|
||||
~BufferCopyOutputStream() override {
|
||||
delete[] buffer_;
|
||||
}
|
||||
};
|
||||
|
||||
unique_ptr<InputStream> fileInputStream(const char *filename,
|
||||
size_t bufferSize) {
|
||||
unique_ptr<BufferCopyIn> in(new FileBufferCopyIn(filename));
|
||||
return unique_ptr<InputStream>(new BufferCopyInInputStream(std::move(in), bufferSize));
|
||||
}
|
||||
|
||||
unique_ptr<SeekableInputStream> fileSeekableInputStream(const char *filename,
|
||||
size_t bufferSize) {
|
||||
unique_ptr<BufferCopyIn> in(new FileBufferCopyIn(filename));
|
||||
return unique_ptr<SeekableInputStream>(new BufferCopyInInputStream(std::move(in),
|
||||
bufferSize));
|
||||
}
|
||||
|
||||
unique_ptr<InputStream> istreamInputStream(istream &is, size_t bufferSize) {
|
||||
unique_ptr<BufferCopyIn> in(new IStreamBufferCopyIn(is));
|
||||
return unique_ptr<InputStream>(new BufferCopyInInputStream(std::move(in), bufferSize));
|
||||
}
|
||||
|
||||
unique_ptr<InputStream> nonSeekableIstreamInputStream(
|
||||
istream &is, size_t bufferSize) {
|
||||
unique_ptr<BufferCopyIn> in(new NonSeekableIStreamBufferCopyIn(is));
|
||||
return unique_ptr<InputStream>(new BufferCopyInInputStream(std::move(in), bufferSize));
|
||||
}
|
||||
|
||||
unique_ptr<OutputStream> fileOutputStream(const char *filename,
|
||||
size_t bufferSize) {
|
||||
unique_ptr<BufferCopyOut> out(new FileBufferCopyOut(filename));
|
||||
return unique_ptr<OutputStream>(new BufferCopyOutputStream(std::move(out), bufferSize));
|
||||
}
|
||||
|
||||
unique_ptr<OutputStream> ostreamOutputStream(ostream &os,
|
||||
size_t bufferSize) {
|
||||
unique_ptr<BufferCopyOut> out(new OStreamBufferCopyOut(os));
|
||||
return unique_ptr<OutputStream>(new BufferCopyOutputStream(std::move(out), bufferSize));
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,228 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Generic.hh"
|
||||
#include <utility>
|
||||
|
||||
namespace avro {
|
||||
|
||||
using std::ostringstream;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
typedef vector<uint8_t> bytes;
|
||||
|
||||
void GenericContainer::assertType(const NodePtr &schema, Type type) {
|
||||
if (schema->type() != type) {
|
||||
throw Exception("Schema type {} expected {}", schema->type(), type);
|
||||
}
|
||||
}
|
||||
|
||||
GenericReader::GenericReader(ValidSchema s, const DecoderPtr &decoder) : schema_(std::move(s)), isResolving_(dynamic_cast<ResolvingDecoder *>(&(*decoder)) != nullptr),
|
||||
decoder_(decoder) {
|
||||
}
|
||||
|
||||
GenericReader::GenericReader(const ValidSchema &writerSchema,
|
||||
const ValidSchema &readerSchema, const DecoderPtr &decoder) : schema_(readerSchema),
|
||||
isResolving_(true),
|
||||
decoder_(resolvingDecoder(writerSchema, readerSchema, decoder)) {
|
||||
}
|
||||
|
||||
void GenericReader::read(GenericDatum &datum) const {
|
||||
datum = GenericDatum(schema_.root());
|
||||
read(datum, *decoder_, isResolving_);
|
||||
}
|
||||
|
||||
void GenericReader::read(GenericDatum &datum, Decoder &d, bool isResolving) {
|
||||
if (datum.isUnion()) {
|
||||
datum.selectBranch(d.decodeUnionIndex());
|
||||
}
|
||||
switch (datum.type()) {
|
||||
case AVRO_NULL:
|
||||
d.decodeNull();
|
||||
break;
|
||||
case AVRO_BOOL:
|
||||
datum.value<bool>() = d.decodeBool();
|
||||
break;
|
||||
case AVRO_INT:
|
||||
datum.value<int32_t>() = d.decodeInt();
|
||||
break;
|
||||
case AVRO_LONG:
|
||||
datum.value<int64_t>() = d.decodeLong();
|
||||
break;
|
||||
case AVRO_FLOAT:
|
||||
datum.value<float>() = d.decodeFloat();
|
||||
break;
|
||||
case AVRO_DOUBLE:
|
||||
datum.value<double>() = d.decodeDouble();
|
||||
break;
|
||||
case AVRO_STRING:
|
||||
d.decodeString(datum.value<string>());
|
||||
break;
|
||||
case AVRO_BYTES:
|
||||
d.decodeBytes(datum.value<bytes>());
|
||||
break;
|
||||
case AVRO_FIXED: {
|
||||
auto &f = datum.value<GenericFixed>();
|
||||
d.decodeFixed(f.schema()->fixedSize(), f.value());
|
||||
} break;
|
||||
case AVRO_RECORD: {
|
||||
auto &r = datum.value<GenericRecord>();
|
||||
size_t c = r.schema()->leaves();
|
||||
if (isResolving) {
|
||||
std::vector<size_t> fo =
|
||||
static_cast<ResolvingDecoder &>(d).fieldOrder();
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
read(r.fieldAt(fo[i]), d, isResolving);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
read(r.fieldAt(i), d, isResolving);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case AVRO_ENUM:
|
||||
datum.value<GenericEnum>().set(d.decodeEnum());
|
||||
break;
|
||||
case AVRO_ARRAY: {
|
||||
auto &v = datum.value<GenericArray>();
|
||||
vector<GenericDatum> &r = v.value();
|
||||
const NodePtr &nn = v.schema()->leafAt(0);
|
||||
r.resize(0);
|
||||
size_t start = 0;
|
||||
for (size_t m = d.arrayStart(); m != 0; m = d.arrayNext()) {
|
||||
r.resize(r.size() + m);
|
||||
for (; start < r.size(); ++start) {
|
||||
r[start] = GenericDatum(nn);
|
||||
read(r[start], d, isResolving);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case AVRO_MAP: {
|
||||
auto &v = datum.value<GenericMap>();
|
||||
GenericMap::Value &r = v.value();
|
||||
const NodePtr &nn = v.schema()->leafAt(1);
|
||||
r.resize(0);
|
||||
size_t start = 0;
|
||||
for (size_t m = d.mapStart(); m != 0; m = d.mapNext()) {
|
||||
r.resize(r.size() + m);
|
||||
for (; start < r.size(); ++start) {
|
||||
d.decodeString(r[start].first);
|
||||
r[start].second = GenericDatum(nn);
|
||||
read(r[start].second, d, isResolving);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
throw Exception("Unknown schema type {}", datum.type());
|
||||
}
|
||||
}
|
||||
|
||||
void GenericReader::read(Decoder &d, GenericDatum &g, const ValidSchema &s) {
|
||||
g = GenericDatum(s);
|
||||
read(d, g);
|
||||
}
|
||||
|
||||
void GenericReader::read(Decoder &d, GenericDatum &g) {
|
||||
read(g, d, dynamic_cast<ResolvingDecoder *>(&d) != nullptr);
|
||||
}
|
||||
|
||||
GenericWriter::GenericWriter(ValidSchema s, EncoderPtr encoder) : schema_(std::move(s)), encoder_(std::move(encoder)) {
|
||||
}
|
||||
|
||||
void GenericWriter::write(const GenericDatum &datum) const {
|
||||
write(datum, *encoder_);
|
||||
}
|
||||
|
||||
void GenericWriter::write(const GenericDatum &datum, Encoder &e) {
|
||||
if (datum.isUnion()) {
|
||||
e.encodeUnionIndex(datum.unionBranch());
|
||||
}
|
||||
switch (datum.type()) {
|
||||
case AVRO_NULL:
|
||||
e.encodeNull();
|
||||
break;
|
||||
case AVRO_BOOL:
|
||||
e.encodeBool(datum.value<bool>());
|
||||
break;
|
||||
case AVRO_INT:
|
||||
e.encodeInt(datum.value<int32_t>());
|
||||
break;
|
||||
case AVRO_LONG:
|
||||
e.encodeLong(datum.value<int64_t>());
|
||||
break;
|
||||
case AVRO_FLOAT:
|
||||
e.encodeFloat(datum.value<float>());
|
||||
break;
|
||||
case AVRO_DOUBLE:
|
||||
e.encodeDouble(datum.value<double>());
|
||||
break;
|
||||
case AVRO_STRING:
|
||||
e.encodeString(datum.value<string>());
|
||||
break;
|
||||
case AVRO_BYTES:
|
||||
e.encodeBytes(datum.value<bytes>());
|
||||
break;
|
||||
case AVRO_FIXED:
|
||||
e.encodeFixed(datum.value<GenericFixed>().value());
|
||||
break;
|
||||
case AVRO_RECORD: {
|
||||
const auto &r = datum.value<GenericRecord>();
|
||||
size_t c = r.schema()->leaves();
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
write(r.fieldAt(i), e);
|
||||
}
|
||||
} break;
|
||||
case AVRO_ENUM:
|
||||
e.encodeEnum(datum.value<GenericEnum>().value());
|
||||
break;
|
||||
case AVRO_ARRAY: {
|
||||
const GenericArray::Value &r = datum.value<GenericArray>().value();
|
||||
e.arrayStart();
|
||||
if (!r.empty()) {
|
||||
e.setItemCount(r.size());
|
||||
for (const auto &it : r) {
|
||||
e.startItem();
|
||||
write(it, e);
|
||||
}
|
||||
}
|
||||
e.arrayEnd();
|
||||
} break;
|
||||
case AVRO_MAP: {
|
||||
const GenericMap::Value &r = datum.value<GenericMap>().value();
|
||||
e.mapStart();
|
||||
if (!r.empty()) {
|
||||
e.setItemCount(r.size());
|
||||
for (const auto &it : r) {
|
||||
e.startItem();
|
||||
e.encodeString(it.first);
|
||||
write(it.second, e);
|
||||
}
|
||||
}
|
||||
e.mapEnd();
|
||||
} break;
|
||||
default:
|
||||
throw Exception("Unknown schema type {}", datum.type());
|
||||
}
|
||||
}
|
||||
|
||||
void GenericWriter::write(Encoder &e, const GenericDatum &g) {
|
||||
write(g, e);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "GenericDatum.hh"
|
||||
#include "NodeImpl.hh"
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace avro {
|
||||
|
||||
GenericDatum::GenericDatum(const ValidSchema &schema) : type_(schema.root()->type()),
|
||||
logicalType_(schema.root()->logicalType()) {
|
||||
init(schema.root());
|
||||
}
|
||||
|
||||
GenericDatum::GenericDatum(const NodePtr &schema) : type_(schema->type()),
|
||||
logicalType_(schema->logicalType()) {
|
||||
init(schema);
|
||||
}
|
||||
|
||||
void GenericDatum::init(const NodePtr &schema) {
|
||||
NodePtr sc = schema;
|
||||
if (type_ == AVRO_SYMBOLIC) {
|
||||
sc = resolveSymbol(schema);
|
||||
type_ = sc->type();
|
||||
logicalType_ = sc->logicalType();
|
||||
}
|
||||
switch (type_) {
|
||||
case AVRO_NULL: break;
|
||||
case AVRO_BOOL:
|
||||
value_ = bool();
|
||||
break;
|
||||
case AVRO_INT:
|
||||
value_ = int32_t();
|
||||
break;
|
||||
case AVRO_LONG:
|
||||
value_ = int64_t();
|
||||
break;
|
||||
case AVRO_FLOAT:
|
||||
value_ = float();
|
||||
break;
|
||||
case AVRO_DOUBLE:
|
||||
value_ = double();
|
||||
break;
|
||||
case AVRO_STRING:
|
||||
value_ = string();
|
||||
break;
|
||||
case AVRO_BYTES:
|
||||
value_ = vector<uint8_t>();
|
||||
break;
|
||||
case AVRO_FIXED:
|
||||
value_ = GenericFixed(sc);
|
||||
break;
|
||||
case AVRO_RECORD:
|
||||
value_ = GenericRecord(sc);
|
||||
break;
|
||||
case AVRO_ENUM:
|
||||
value_ = GenericEnum(sc);
|
||||
break;
|
||||
case AVRO_ARRAY:
|
||||
value_ = GenericArray(sc);
|
||||
break;
|
||||
case AVRO_MAP:
|
||||
value_ = GenericMap(sc);
|
||||
break;
|
||||
case AVRO_UNION:
|
||||
value_ = GenericUnion(sc);
|
||||
break;
|
||||
default:
|
||||
throw Exception("Unknown schema type {}", toString(type_));
|
||||
}
|
||||
}
|
||||
|
||||
GenericRecord::GenericRecord(const NodePtr &schema) : GenericContainer(AVRO_RECORD, schema) {
|
||||
fields_.resize(schema->leaves());
|
||||
for (size_t i = 0; i < schema->leaves(); ++i) {
|
||||
fields_[i] = GenericDatum(schema->leafAt(i));
|
||||
}
|
||||
}
|
||||
|
||||
GenericFixed::GenericFixed(const NodePtr &schema, const vector<uint8_t> &v) : GenericContainer(AVRO_FIXED, schema), value_(v) {}
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "LogicalType.hh"
|
||||
#include "Exception.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
LogicalType::LogicalType(Type type)
|
||||
: type_(type), precision_(0), scale_(0) {}
|
||||
|
||||
LogicalType::Type LogicalType::type() const {
|
||||
return type_;
|
||||
}
|
||||
|
||||
void LogicalType::setPrecision(int32_t precision) {
|
||||
if (type_ != DECIMAL) {
|
||||
throw Exception("Only logical type DECIMAL can have precision");
|
||||
}
|
||||
if (precision <= 0) {
|
||||
throw Exception("Precision cannot be: {}", precision);
|
||||
}
|
||||
precision_ = precision;
|
||||
}
|
||||
|
||||
void LogicalType::setScale(int32_t scale) {
|
||||
if (type_ != DECIMAL) {
|
||||
throw Exception("Only logical type DECIMAL can have scale");
|
||||
}
|
||||
if (scale < 0) {
|
||||
throw Exception("Scale cannot be: {}", scale);
|
||||
}
|
||||
scale_ = scale;
|
||||
}
|
||||
|
||||
void LogicalType::printJson(std::ostream &os) const {
|
||||
switch (type_) {
|
||||
case LogicalType::NONE: break;
|
||||
case LogicalType::DECIMAL:
|
||||
os << R"("logicalType": "decimal")";
|
||||
os << ", \"precision\": " << precision_;
|
||||
os << ", \"scale\": " << scale_;
|
||||
break;
|
||||
case DATE:
|
||||
os << R"("logicalType": "date")";
|
||||
break;
|
||||
case TIME_MILLIS:
|
||||
os << R"("logicalType": "time-millis")";
|
||||
break;
|
||||
case TIME_MICROS:
|
||||
os << R"("logicalType": "time-micros")";
|
||||
break;
|
||||
case TIMESTAMP_MILLIS:
|
||||
os << R"("logicalType": "timestamp-millis")";
|
||||
break;
|
||||
case TIMESTAMP_MICROS:
|
||||
os << R"("logicalType": "timestamp-micros")";
|
||||
break;
|
||||
case DURATION:
|
||||
os << R"("logicalType": "duration")";
|
||||
break;
|
||||
case UUID:
|
||||
os << R"("logicalType": "uuid")";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Node.hh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace avro {
|
||||
|
||||
using std::string;
|
||||
|
||||
Node::~Node() = default;
|
||||
|
||||
struct Name::Aliases {
|
||||
std::vector<std::string> raw;
|
||||
std::unordered_set<std::string> fullyQualified;
|
||||
};
|
||||
|
||||
Name::Name() = default;
|
||||
|
||||
Name::Name(const std::string& name) {
|
||||
fullname(name);
|
||||
}
|
||||
|
||||
Name::Name(std::string simpleName, std::string ns)
|
||||
: ns_(std::move(ns)), simpleName_(std::move(simpleName)) {
|
||||
check();
|
||||
}
|
||||
|
||||
Name::Name(const Name& other) {
|
||||
*this = other;
|
||||
}
|
||||
|
||||
Name& Name::operator=(const Name& other) {
|
||||
if (this != &other) {
|
||||
ns_ = other.ns_;
|
||||
simpleName_ = other.simpleName_;
|
||||
if (other.aliases_) {
|
||||
aliases_ = std::make_unique<Aliases>(*other.aliases_);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Name::Name(Name&& other) = default;
|
||||
|
||||
Name& Name::operator=(Name&& other) = default;
|
||||
|
||||
Name::~Name() = default;
|
||||
|
||||
string Name::fullname() const {
|
||||
return ns_.empty() ? simpleName_ : ns_ + "." + simpleName_;
|
||||
}
|
||||
|
||||
void Name::fullname(const string& name) {
|
||||
string::size_type n = name.find_last_of('.');
|
||||
if (n == string::npos) {
|
||||
simpleName_ = name;
|
||||
ns_.clear();
|
||||
} else {
|
||||
ns_ = name.substr(0, n);
|
||||
simpleName_ = name.substr(n + 1);
|
||||
}
|
||||
check();
|
||||
}
|
||||
|
||||
const std::vector<std::string>& Name::aliases() const {
|
||||
static const std::vector<std::string> emptyAliases;
|
||||
return aliases_ ? aliases_->raw : emptyAliases;
|
||||
}
|
||||
|
||||
void Name::addAlias(const std::string& alias) {
|
||||
if (!aliases_) {
|
||||
aliases_ = std::make_unique<Aliases>();
|
||||
}
|
||||
aliases_->raw.push_back(alias);
|
||||
if (!ns_.empty() && alias.find_last_of('.') == string::npos) {
|
||||
aliases_->fullyQualified.emplace(ns_ + "." + alias);
|
||||
} else {
|
||||
aliases_->fullyQualified.insert(alias);
|
||||
}
|
||||
}
|
||||
|
||||
bool Name::operator<(const Name& n) const {
|
||||
return (ns_ < n.ns_) || (!(n.ns_ < ns_) && (simpleName_ < n.simpleName_));
|
||||
}
|
||||
|
||||
static bool invalidChar1(char c) {
|
||||
return !isalnum(c) && c != '_' && c != '.' && c != '$';
|
||||
}
|
||||
|
||||
static bool invalidChar2(char c) {
|
||||
return !isalnum(c) && c != '_';
|
||||
}
|
||||
|
||||
void Name::check() const {
|
||||
if (!ns_.empty() &&
|
||||
(ns_[0] == '.' || ns_[ns_.size() - 1] == '.' ||
|
||||
std::find_if(ns_.begin(), ns_.end(), invalidChar1) != ns_.end())) {
|
||||
throw Exception("Invalid namespace: " + ns_);
|
||||
}
|
||||
if (simpleName_.empty() ||
|
||||
std::find_if(simpleName_.begin(), simpleName_.end(), invalidChar2) != simpleName_.end()) {
|
||||
throw Exception("Invalid name: " + simpleName_);
|
||||
}
|
||||
}
|
||||
|
||||
bool Name::operator==(const Name& n) const {
|
||||
return ns_ == n.ns_ && simpleName_ == n.simpleName_;
|
||||
}
|
||||
|
||||
bool Name::equalOrAliasedBy(const Name& n) const {
|
||||
return *this == n ||
|
||||
(n.aliases_ &&
|
||||
n.aliases_->fullyQualified.find(fullname()) != n.aliases_->fullyQualified.end());
|
||||
}
|
||||
|
||||
void Name::clear() {
|
||||
ns_.clear();
|
||||
simpleName_.clear();
|
||||
aliases_.reset();
|
||||
}
|
||||
|
||||
void Node::setLogicalType(LogicalType logicalType) {
|
||||
checkLock();
|
||||
|
||||
// Check that the logical type is applicable to the node type.
|
||||
switch (logicalType.type()) {
|
||||
case LogicalType::NONE:
|
||||
break;
|
||||
case LogicalType::DECIMAL: {
|
||||
if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) {
|
||||
throw Exception(
|
||||
"DECIMAL logical type can annotate "
|
||||
"only BYTES or FIXED type");
|
||||
}
|
||||
if (type_ == AVRO_FIXED) {
|
||||
// Max precision that can be supported by the current size of
|
||||
// the FIXED type.
|
||||
auto maxPrecision = static_cast<int32_t>(
|
||||
floor(log10(2.0) * (8.0 * static_cast<double>(fixedSize()) - 1)));
|
||||
if (logicalType.precision() > maxPrecision) {
|
||||
throw Exception(
|
||||
"DECIMAL precision {} is too large for the "
|
||||
"FIXED type of size {}, precision cannot be "
|
||||
"larger than {}",
|
||||
logicalType.precision(),
|
||||
fixedSize(),
|
||||
maxPrecision);
|
||||
}
|
||||
}
|
||||
if (logicalType.scale() > logicalType.precision()) {
|
||||
throw Exception("DECIMAL scale cannot exceed precision");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalType::DATE:
|
||||
if (type_ != AVRO_INT) {
|
||||
throw Exception("DATE logical type can only annotate INT type");
|
||||
}
|
||||
break;
|
||||
case LogicalType::TIME_MILLIS:
|
||||
if (type_ != AVRO_INT) {
|
||||
throw Exception(
|
||||
"TIME-MILLIS logical type can only annotate "
|
||||
"INT type");
|
||||
}
|
||||
break;
|
||||
case LogicalType::TIME_MICROS:
|
||||
if (type_ != AVRO_LONG) {
|
||||
throw Exception(
|
||||
"TIME-MICROS logical type can only annotate "
|
||||
"LONG type");
|
||||
}
|
||||
break;
|
||||
case LogicalType::TIMESTAMP_MILLIS:
|
||||
if (type_ != AVRO_LONG) {
|
||||
throw Exception(
|
||||
"TIMESTAMP-MILLIS logical type can only annotate "
|
||||
"LONG type");
|
||||
}
|
||||
break;
|
||||
case LogicalType::TIMESTAMP_MICROS:
|
||||
if (type_ != AVRO_LONG) {
|
||||
throw Exception(
|
||||
"TIMESTAMP-MICROS logical type can only annotate "
|
||||
"LONG type");
|
||||
}
|
||||
break;
|
||||
case LogicalType::DURATION:
|
||||
if (type_ != AVRO_FIXED || fixedSize() != 12) {
|
||||
throw Exception(
|
||||
"DURATION logical type can only annotate "
|
||||
"FIXED type of size 12");
|
||||
}
|
||||
break;
|
||||
case LogicalType::UUID:
|
||||
if (type_ != AVRO_STRING) {
|
||||
throw Exception(
|
||||
"UUID logical type can only annotate "
|
||||
"STRING type");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
logicalType_ = logicalType;
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,613 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "NodeImpl.hh"
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
using std::string;
|
||||
namespace avro {
|
||||
|
||||
namespace {
|
||||
|
||||
// Escape string for serialization.
|
||||
string escape(const string &unescaped) {
|
||||
string s;
|
||||
s.reserve(unescaped.length());
|
||||
for (char c : unescaped) {
|
||||
switch (c) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '/':
|
||||
s += '\\';
|
||||
s += c;
|
||||
break;
|
||||
case '\b':
|
||||
s += '\\';
|
||||
s += 'b';
|
||||
break;
|
||||
case '\f':
|
||||
s += '\f';
|
||||
break;
|
||||
case '\n':
|
||||
s += '\\';
|
||||
s += 'n';
|
||||
break;
|
||||
case '\r':
|
||||
s += '\\';
|
||||
s += 'r';
|
||||
break;
|
||||
case '\t':
|
||||
s += '\\';
|
||||
s += 't';
|
||||
break;
|
||||
default:
|
||||
if (!std::iscntrl(c, std::locale::classic())) {
|
||||
s += c;
|
||||
continue;
|
||||
}
|
||||
s += intToHex(static_cast<unsigned int>(c));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// Wrap an indentation in a struct for ostream operator<<
|
||||
struct indent {
|
||||
explicit indent(size_t depth) : d(depth) {}
|
||||
size_t d;
|
||||
};
|
||||
|
||||
/// ostream operator for indent
|
||||
std::ostream &operator<<(std::ostream &os, indent x) {
|
||||
static const string spaces(" ");
|
||||
while (x.d--) {
|
||||
os << spaces;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
void printCustomAttributes(const CustomAttributes &customAttributes, size_t depth,
|
||||
std::ostream &os) {
|
||||
std::map<std::string, std::string>::const_iterator iter =
|
||||
customAttributes.attributes().begin();
|
||||
while (iter != customAttributes.attributes().end()) {
|
||||
os << ",\n"
|
||||
<< indent(depth);
|
||||
customAttributes.printJson(os, iter->first);
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
const int kByteStringSize = 6;
|
||||
|
||||
SchemaResolution
|
||||
NodePrimitive::resolve(const Node &reader) const {
|
||||
if (type() == reader.type()) {
|
||||
return RESOLVE_MATCH;
|
||||
}
|
||||
|
||||
switch (type()) {
|
||||
|
||||
case AVRO_INT:
|
||||
|
||||
if (reader.type() == AVRO_LONG) {
|
||||
return RESOLVE_PROMOTABLE_TO_LONG;
|
||||
}
|
||||
|
||||
[[fallthrough]];
|
||||
|
||||
case AVRO_LONG:
|
||||
|
||||
if (reader.type() == AVRO_FLOAT) {
|
||||
return RESOLVE_PROMOTABLE_TO_FLOAT;
|
||||
}
|
||||
|
||||
[[fallthrough]];
|
||||
|
||||
case AVRO_FLOAT:
|
||||
|
||||
if (reader.type() == AVRO_DOUBLE) {
|
||||
return RESOLVE_PROMOTABLE_TO_DOUBLE;
|
||||
}
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
return furtherResolution(reader);
|
||||
}
|
||||
|
||||
SchemaResolution
|
||||
NodeRecord::resolve(const Node &reader) const {
|
||||
if (reader.type() == AVRO_RECORD) {
|
||||
if (name() == reader.name()) {
|
||||
return RESOLVE_MATCH;
|
||||
}
|
||||
}
|
||||
return furtherResolution(reader);
|
||||
}
|
||||
|
||||
SchemaResolution
|
||||
NodeEnum::resolve(const Node &reader) const {
|
||||
if (reader.type() == AVRO_ENUM) {
|
||||
return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
|
||||
}
|
||||
return furtherResolution(reader);
|
||||
}
|
||||
|
||||
SchemaResolution
|
||||
NodeArray::resolve(const Node &reader) const {
|
||||
if (reader.type() == AVRO_ARRAY) {
|
||||
const NodePtr &arrayType = leafAt(0);
|
||||
return arrayType->resolve(*reader.leafAt(0));
|
||||
}
|
||||
return furtherResolution(reader);
|
||||
}
|
||||
|
||||
SchemaResolution
|
||||
NodeMap::resolve(const Node &reader) const {
|
||||
if (reader.type() == AVRO_MAP) {
|
||||
const NodePtr &mapType = leafAt(1);
|
||||
return mapType->resolve(*reader.leafAt(1));
|
||||
}
|
||||
return furtherResolution(reader);
|
||||
}
|
||||
|
||||
SchemaResolution
|
||||
NodeUnion::resolve(const Node &reader) const {
|
||||
|
||||
// If the writer is union, resolution only needs to occur when the selected
|
||||
// type of the writer is known, so this function is not very helpful.
|
||||
//
|
||||
// In this case, this function returns if there is a possible match given
|
||||
// any writer type, so just search type by type returning the best match
|
||||
// found.
|
||||
|
||||
SchemaResolution match = RESOLVE_NO_MATCH;
|
||||
for (size_t i = 0; i < leaves(); ++i) {
|
||||
const NodePtr &node = leafAt(i);
|
||||
SchemaResolution thisMatch = node->resolve(reader);
|
||||
if (thisMatch == RESOLVE_MATCH) {
|
||||
match = thisMatch;
|
||||
break;
|
||||
}
|
||||
if (match == RESOLVE_NO_MATCH) {
|
||||
match = thisMatch;
|
||||
}
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
SchemaResolution
|
||||
NodeFixed::resolve(const Node &reader) const {
|
||||
if (reader.type() == AVRO_FIXED) {
|
||||
return (
|
||||
(reader.fixedSize() == fixedSize()) && (reader.name() == name()))
|
||||
? RESOLVE_MATCH
|
||||
: RESOLVE_NO_MATCH;
|
||||
}
|
||||
return furtherResolution(reader);
|
||||
}
|
||||
|
||||
SchemaResolution
|
||||
NodeSymbolic::resolve(const Node &reader) const {
|
||||
const NodePtr &node = leafAt(0);
|
||||
return node->resolve(reader);
|
||||
}
|
||||
|
||||
void NodePrimitive::printJson(std::ostream &os, size_t depth) const {
|
||||
bool hasLogicalType = logicalType().type() != LogicalType::NONE;
|
||||
|
||||
if (hasLogicalType) {
|
||||
os << "{\n"
|
||||
<< indent(depth) << "\"type\": ";
|
||||
}
|
||||
|
||||
os << '\"' << type() << '\"';
|
||||
|
||||
if (hasLogicalType) {
|
||||
os << ",\n"
|
||||
<< indent(depth);
|
||||
logicalType().printJson(os);
|
||||
os << "\n}";
|
||||
}
|
||||
if (!getDoc().empty()) {
|
||||
os << ",\n"
|
||||
<< indent(depth) << R"("doc": ")"
|
||||
<< escape(getDoc()) << "\"";
|
||||
}
|
||||
}
|
||||
|
||||
void NodeSymbolic::printJson(std::ostream &os, size_t depth) const {
|
||||
os << '\"' << nameAttribute_.get() << '\"';
|
||||
if (!getDoc().empty()) {
|
||||
os << ",\n"
|
||||
<< indent(depth) << R"("doc": ")"
|
||||
<< escape(getDoc()) << "\"";
|
||||
}
|
||||
}
|
||||
|
||||
static void printName(std::ostream &os, const Name &n, size_t depth) {
|
||||
if (!n.ns().empty()) {
|
||||
os << indent(depth) << R"("namespace": ")" << n.ns() << "\",\n";
|
||||
}
|
||||
os << indent(depth) << R"("name": ")" << n.simpleName() << "\",\n";
|
||||
}
|
||||
|
||||
void NodeRecord::printJson(std::ostream &os, size_t depth) const {
|
||||
os << "{\n";
|
||||
os << indent(++depth) << "\"type\": \"record\",\n";
|
||||
const Name &name = nameAttribute_.get();
|
||||
printName(os, name, depth);
|
||||
|
||||
const auto &aliases = name.aliases();
|
||||
if (!aliases.empty()) {
|
||||
os << indent(depth) << "\"aliases\": [";
|
||||
++depth;
|
||||
for (size_t i = 0; i < aliases.size(); ++i) {
|
||||
if (i > 0) {
|
||||
os << ',';
|
||||
}
|
||||
os << '\n'
|
||||
<< indent(depth) << "\"" << aliases[i] << "\"";
|
||||
}
|
||||
os << '\n'
|
||||
<< indent(--depth) << "]\n";
|
||||
}
|
||||
|
||||
if (!getDoc().empty()) {
|
||||
os << indent(depth) << R"("doc": ")"
|
||||
<< escape(getDoc()) << "\",\n";
|
||||
}
|
||||
|
||||
os << indent(depth) << "\"fields\": [";
|
||||
size_t fields = leafAttributes_.size();
|
||||
++depth;
|
||||
assert(fieldsAliases_.empty() || (fieldsAliases_.size() == fields));
|
||||
assert(fieldsDefaultValues_.empty() || (fieldsDefaultValues_.size() == fields));
|
||||
assert(customAttributes_.size() == 0 || customAttributes_.size() == fields);
|
||||
for (size_t i = 0; i < fields; ++i) {
|
||||
if (i > 0) {
|
||||
os << ',';
|
||||
}
|
||||
os << '\n'
|
||||
<< indent(depth) << "{\n";
|
||||
os << indent(++depth) << R"("name": ")" << leafNameAttributes_.get(i) << "\",\n";
|
||||
os << indent(depth) << "\"type\": ";
|
||||
leafAttributes_.get(i)->printJson(os, depth);
|
||||
|
||||
if (!fieldsAliases_.empty() && !fieldsAliases_[i].empty()) {
|
||||
os << ",\n"
|
||||
<< indent(depth) << "\"aliases\": [";
|
||||
++depth;
|
||||
for (size_t j = 0; j < fieldsAliases_[i].size(); ++j) {
|
||||
if (j > 0) {
|
||||
os << ',';
|
||||
}
|
||||
os << '\n'
|
||||
<< indent(depth) << "\"" << fieldsAliases_[i][j] << "\"";
|
||||
}
|
||||
os << '\n'
|
||||
<< indent(--depth) << ']';
|
||||
}
|
||||
|
||||
// Serialize "default" field:
|
||||
if (!fieldsDefaultValues_.empty()) {
|
||||
if (!fieldsDefaultValues_[i].isUnion() && fieldsDefaultValues_[i].type() == AVRO_NULL) {
|
||||
// No "default" field.
|
||||
} else {
|
||||
os << ",\n"
|
||||
<< indent(depth) << "\"default\": ";
|
||||
leafAttributes_.get(i)->printDefaultToJson(fieldsDefaultValues_[i], os,
|
||||
depth);
|
||||
}
|
||||
}
|
||||
|
||||
if (customAttributes_.size() == fields) {
|
||||
printCustomAttributes(customAttributes_.get(i), depth, os);
|
||||
}
|
||||
|
||||
os << '\n';
|
||||
os << indent(--depth) << '}';
|
||||
}
|
||||
os << '\n'
|
||||
<< indent(--depth) << "]\n";
|
||||
os << indent(--depth) << '}';
|
||||
}
|
||||
|
||||
void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t) const {
|
||||
assert(isPrimitive(g.type()));
|
||||
|
||||
switch (g.type()) {
|
||||
case AVRO_NULL:
|
||||
os << "null";
|
||||
break;
|
||||
case AVRO_BOOL:
|
||||
os << (g.value<bool>() ? "true" : "false");
|
||||
break;
|
||||
case AVRO_INT:
|
||||
os << g.value<int32_t>();
|
||||
break;
|
||||
case AVRO_LONG:
|
||||
os << g.value<int64_t>();
|
||||
break;
|
||||
case AVRO_FLOAT:
|
||||
os << g.value<float>();
|
||||
break;
|
||||
case AVRO_DOUBLE:
|
||||
os << g.value<double>();
|
||||
break;
|
||||
case AVRO_STRING:
|
||||
os << "\"" << escape(g.value<string>()) << "\"";
|
||||
break;
|
||||
case AVRO_BYTES: {
|
||||
// Convert to a string:
|
||||
const auto &vg = g.value<std::vector<uint8_t>>();
|
||||
string s;
|
||||
s.resize(vg.size() * kByteStringSize);
|
||||
for (unsigned int i = 0; i < vg.size(); i++) {
|
||||
string hex_string = intToHex(static_cast<int>(vg[i]));
|
||||
s.replace(i * kByteStringSize, kByteStringSize, hex_string);
|
||||
}
|
||||
os << "\"" << s << "\"";
|
||||
} break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t) const {
|
||||
assert(g.type() == AVRO_ENUM);
|
||||
os << "\"" << g.value<GenericEnum>().symbol() << "\"";
|
||||
}
|
||||
|
||||
void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t) const {
|
||||
assert(g.type() == AVRO_FIXED);
|
||||
// ex: "\uOOff"
|
||||
// Convert to a string
|
||||
const std::vector<uint8_t> &vg = g.value<GenericFixed>().value();
|
||||
string s;
|
||||
s.resize(vg.size() * kByteStringSize);
|
||||
for (unsigned int i = 0; i < vg.size(); i++) {
|
||||
string hex_string = intToHex(static_cast<int>(vg[i]));
|
||||
s.replace(i * kByteStringSize, kByteStringSize, hex_string);
|
||||
}
|
||||
os << "\"" << s << "\"";
|
||||
}
|
||||
|
||||
void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t depth) const {
|
||||
leafAt(0)->printDefaultToJson(g, os, depth);
|
||||
}
|
||||
|
||||
void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t depth) const {
|
||||
assert(g.type() == AVRO_ARRAY);
|
||||
// ex: "default": [1]
|
||||
if (g.value<GenericArray>().value().empty()) {
|
||||
os << "[]";
|
||||
} else {
|
||||
os << "[\n";
|
||||
depth++;
|
||||
|
||||
// Serialize all values of the array with recursive calls:
|
||||
for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) {
|
||||
if (i > 0) {
|
||||
os << ",\n";
|
||||
}
|
||||
os << indent(depth);
|
||||
leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os,
|
||||
depth);
|
||||
}
|
||||
os << "\n"
|
||||
<< indent(--depth) << "]";
|
||||
}
|
||||
}
|
||||
|
||||
void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t depth) const {
|
||||
getNode()->printDefaultToJson(g, os, depth);
|
||||
}
|
||||
|
||||
void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t depth) const {
|
||||
assert(g.type() == AVRO_RECORD);
|
||||
if (g.value<GenericRecord>().fieldCount() == 0) {
|
||||
os << "{}";
|
||||
} else {
|
||||
os << "{\n";
|
||||
|
||||
// Serialize all fields of the record with recursive calls:
|
||||
for (size_t i = 0; i < g.value<GenericRecord>().fieldCount(); i++) {
|
||||
if (i == 0) {
|
||||
++depth;
|
||||
} else { // i > 0
|
||||
os << ",\n";
|
||||
}
|
||||
|
||||
os << indent(depth) << "\"";
|
||||
assert(i < leaves());
|
||||
os << leafNameAttributes_.get(i);
|
||||
os << "\": ";
|
||||
|
||||
// Recursive call on child node to be able to get the name attribute
|
||||
// (In case of a record we need the name of the leaves (contained in
|
||||
// 'this'))
|
||||
leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os,
|
||||
depth);
|
||||
}
|
||||
os << "\n"
|
||||
<< indent(--depth) << "}";
|
||||
}
|
||||
}
|
||||
|
||||
NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<GenericDatum> dv)
|
||||
: NodeRecord(name, HasDoc(), fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {}
|
||||
|
||||
NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<GenericDatum> dv)
|
||||
: NodeRecord(name, doc, fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {}
|
||||
|
||||
NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
|
||||
std::vector<GenericDatum> dv, const MultiAttributes &customAttributes)
|
||||
: NodeRecord(name, HasDoc(), fields, fieldsNames, std::move(fieldsAliases), std::move(dv), customAttributes) {}
|
||||
|
||||
NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
|
||||
std::vector<GenericDatum> dv, const MultiAttributes &customAttributes)
|
||||
: NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, customAttributes, NoSize()),
|
||||
fieldsAliases_(std::move(fieldsAliases)),
|
||||
fieldsDefaultValues_(std::move(dv)) {
|
||||
|
||||
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
|
||||
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
|
||||
throw Exception("Cannot add duplicate field: {}", leafNameAttributes_.get(i));
|
||||
}
|
||||
|
||||
if (!fieldsAliases_.empty()) {
|
||||
for (const auto &alias : fieldsAliases_[i]) {
|
||||
if (!nameIndex_.add(alias, i)) {
|
||||
throw Exception("Cannot add duplicate field: {}", alias);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os,
|
||||
size_t depth) const {
|
||||
assert(g.type() == AVRO_MAP);
|
||||
if (g.value<GenericMap>().value().empty()) {
|
||||
os << "{}";
|
||||
} else {
|
||||
os << "{\n";
|
||||
|
||||
for (size_t i = 0; i < g.value<GenericMap>().value().size(); i++) {
|
||||
if (i == 0) {
|
||||
++depth;
|
||||
} else {
|
||||
os << ",\n";
|
||||
}
|
||||
os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first
|
||||
<< "\": ";
|
||||
|
||||
leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, os,
|
||||
depth);
|
||||
}
|
||||
os << "\n"
|
||||
<< indent(--depth) << "}";
|
||||
}
|
||||
}
|
||||
|
||||
void NodeEnum::printJson(std::ostream &os, size_t depth) const {
|
||||
os << "{\n";
|
||||
os << indent(++depth) << "\"type\": \"enum\",\n";
|
||||
if (!getDoc().empty()) {
|
||||
os << indent(depth) << R"("doc": ")"
|
||||
<< escape(getDoc()) << "\",\n";
|
||||
}
|
||||
printName(os, nameAttribute_.get(), depth);
|
||||
os << indent(depth) << "\"symbols\": [\n";
|
||||
|
||||
auto names = leafNameAttributes_.size();
|
||||
++depth;
|
||||
for (size_t i = 0; i < names; ++i) {
|
||||
if (i > 0) {
|
||||
os << ",\n";
|
||||
}
|
||||
os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
|
||||
}
|
||||
os << '\n';
|
||||
os << indent(--depth) << "]\n";
|
||||
os << indent(--depth) << '}';
|
||||
}
|
||||
|
||||
void NodeArray::printJson(std::ostream &os, size_t depth) const {
|
||||
os << "{\n";
|
||||
os << indent(depth + 1) << "\"type\": \"array\",\n";
|
||||
if (!getDoc().empty()) {
|
||||
os << indent(depth + 1) << R"("doc": ")"
|
||||
<< escape(getDoc()) << "\",\n";
|
||||
}
|
||||
os << indent(depth + 1) << "\"items\": ";
|
||||
leafAttributes_.get()->printJson(os, depth + 1);
|
||||
os << '\n';
|
||||
os << indent(depth) << '}';
|
||||
}
|
||||
|
||||
void NodeMap::printJson(std::ostream &os, size_t depth) const {
|
||||
os << "{\n";
|
||||
os << indent(depth + 1) << "\"type\": \"map\",\n";
|
||||
if (!getDoc().empty()) {
|
||||
os << indent(depth + 1) << R"("doc": ")"
|
||||
<< escape(getDoc()) << "\",\n";
|
||||
}
|
||||
os << indent(depth + 1) << "\"values\": ";
|
||||
leafAttributes_.get(1)->printJson(os, depth + 1);
|
||||
os << '\n';
|
||||
os << indent(depth) << '}';
|
||||
}
|
||||
|
||||
NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) {
|
||||
NodePtr key(new NodePrimitive(AVRO_STRING));
|
||||
doAddLeaf(key);
|
||||
}
|
||||
|
||||
void NodeUnion::printJson(std::ostream &os, size_t depth) const {
|
||||
os << "[\n";
|
||||
auto fields = leafAttributes_.size();
|
||||
++depth;
|
||||
for (size_t i = 0; i < fields; ++i) {
|
||||
if (i > 0) {
|
||||
os << ",\n";
|
||||
}
|
||||
os << indent(depth);
|
||||
leafAttributes_.get(i)->printJson(os, depth);
|
||||
}
|
||||
os << '\n';
|
||||
os << indent(--depth) << ']';
|
||||
}
|
||||
|
||||
void NodeFixed::printJson(std::ostream &os, size_t depth) const {
|
||||
os << "{\n";
|
||||
os << indent(++depth) << "\"type\": \"fixed\",\n";
|
||||
if (!getDoc().empty()) {
|
||||
os << indent(depth) << R"("doc": ")"
|
||||
<< escape(getDoc()) << "\",\n";
|
||||
}
|
||||
printName(os, nameAttribute_.get(), depth);
|
||||
os << indent(depth) << "\"size\": " << sizeAttribute_.get();
|
||||
|
||||
if (logicalType().type() != LogicalType::NONE) {
|
||||
os << ",\n"
|
||||
<< indent(depth);
|
||||
logicalType().printJson(os);
|
||||
}
|
||||
|
||||
os << "\n"
|
||||
<< indent(--depth) << '}';
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,744 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Resolver.hh"
|
||||
#include "AvroTraits.hh"
|
||||
#include "Layout.hh"
|
||||
#include "NodeImpl.hh"
|
||||
#include "Reader.hh"
|
||||
#include "ValidSchema.hh"
|
||||
#include <memory>
|
||||
|
||||
namespace avro {
|
||||
using std::unique_ptr;
|
||||
|
||||
class ResolverFactory;
|
||||
typedef std::shared_ptr<Resolver> ResolverPtr;
|
||||
typedef std::vector<std::unique_ptr<Resolver>> ResolverPtrVector;
|
||||
|
||||
// #define DEBUG_VERBOSE
|
||||
|
||||
#ifdef DEBUG_VERBOSE
|
||||
#define DEBUG_OUT(str) std::cout << str << '\n'
|
||||
#else
|
||||
class NoOp {};
|
||||
template<typename T>
|
||||
NoOp &operator<<(NoOp &noOp, const T &) {
|
||||
return noOp;
|
||||
}
|
||||
NoOp noop;
|
||||
#define DEBUG_OUT(str) noop << str
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
class PrimitiveSkipper : public Resolver {
|
||||
public:
|
||||
PrimitiveSkipper() : Resolver() {}
|
||||
|
||||
void parse(Reader &reader, uint8_t *) const final {
|
||||
T val;
|
||||
reader.readValue(val);
|
||||
DEBUG_OUT("Skipping " << val);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class PrimitiveParser : public Resolver {
|
||||
public:
|
||||
explicit PrimitiveParser(const PrimitiveLayout &offset) : Resolver(),
|
||||
offset_(offset.offset()) {}
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
T *location = reinterpret_cast<T *>(address + offset_);
|
||||
reader.readValue(*location);
|
||||
DEBUG_OUT("Reading " << *location);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t offset_;
|
||||
};
|
||||
|
||||
template<typename WT, typename RT>
|
||||
class PrimitivePromoter : public Resolver {
|
||||
public:
|
||||
explicit PrimitivePromoter(const PrimitiveLayout &offset) : Resolver(),
|
||||
offset_(offset.offset()) {}
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
parseIt<WT>(reader, address);
|
||||
}
|
||||
|
||||
private:
|
||||
void parseIt(Reader &reader, uint8_t *address, const std::true_type &) const {
|
||||
WT val;
|
||||
reader.readValue(val);
|
||||
RT *location = reinterpret_cast<RT *>(address + offset_);
|
||||
*location = static_cast<RT>(val);
|
||||
DEBUG_OUT("Promoting " << val);
|
||||
}
|
||||
|
||||
void parseIt(Reader &, uint8_t *, const std::false_type &) const {}
|
||||
|
||||
template<typename T>
|
||||
void parseIt(Reader &reader, uint8_t *address) const {
|
||||
parseIt(reader, address, is_promotable<T>());
|
||||
}
|
||||
|
||||
size_t offset_;
|
||||
};
|
||||
|
||||
template<>
|
||||
class PrimitiveSkipper<std::vector<uint8_t>> : public Resolver {
|
||||
public:
|
||||
PrimitiveSkipper() : Resolver() {}
|
||||
|
||||
void parse(Reader &reader, uint8_t *) const final {
|
||||
std::vector<uint8_t> val;
|
||||
reader.readBytes(val);
|
||||
DEBUG_OUT("Skipping bytes");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
class PrimitiveParser<std::vector<uint8_t>> : public Resolver {
|
||||
public:
|
||||
explicit PrimitiveParser(const PrimitiveLayout &offset) : Resolver(),
|
||||
offset_(offset.offset()) {}
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
auto *location = reinterpret_cast<std::vector<uint8_t> *>(address + offset_);
|
||||
reader.readBytes(*location);
|
||||
DEBUG_OUT("Reading bytes");
|
||||
}
|
||||
|
||||
private:
|
||||
size_t offset_;
|
||||
};
|
||||
|
||||
class RecordSkipper : public Resolver {
|
||||
public:
|
||||
RecordSkipper(ResolverFactory &factory, const NodePtr &writer);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Skipping record");
|
||||
|
||||
reader.readRecord();
|
||||
size_t steps = resolvers_.size();
|
||||
for (size_t i = 0; i < steps; ++i) {
|
||||
resolvers_[i]->parse(reader, address);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtrVector resolvers_;
|
||||
};
|
||||
|
||||
class RecordParser : public Resolver {
|
||||
public:
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Reading record");
|
||||
|
||||
reader.readRecord();
|
||||
size_t steps = resolvers_.size();
|
||||
for (size_t i = 0; i < steps; ++i) {
|
||||
resolvers_[i]->parse(reader, address);
|
||||
}
|
||||
}
|
||||
|
||||
RecordParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
|
||||
|
||||
protected:
|
||||
ResolverPtrVector resolvers_;
|
||||
};
|
||||
|
||||
class MapSkipper : public Resolver {
|
||||
public:
|
||||
MapSkipper(ResolverFactory &factory, const NodePtr &writer);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Skipping map");
|
||||
|
||||
std::string key;
|
||||
int64_t size;
|
||||
do {
|
||||
size = reader.readMapBlockSize();
|
||||
for (auto i = 0; i < size; ++i) {
|
||||
reader.readValue(key);
|
||||
resolver_->parse(reader, address);
|
||||
}
|
||||
} while (size != 0);
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtr resolver_;
|
||||
};
|
||||
|
||||
class MapParser : public Resolver {
|
||||
public:
|
||||
typedef uint8_t *(*GenericMapSetter)(uint8_t *map, const std::string &key);
|
||||
|
||||
MapParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Reading map");
|
||||
|
||||
uint8_t *mapAddress = address + offset_;
|
||||
|
||||
std::string key;
|
||||
auto *setter = reinterpret_cast<GenericMapSetter *>(address + setFuncOffset_);
|
||||
|
||||
int64_t size;
|
||||
do {
|
||||
size = reader.readMapBlockSize();
|
||||
for (auto i = 0; i < size; ++i) {
|
||||
reader.readValue(key);
|
||||
|
||||
// create a new map entry and get the address
|
||||
uint8_t *location = (*setter)(mapAddress, key);
|
||||
resolver_->parse(reader, location);
|
||||
}
|
||||
} while (size != 0);
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtr resolver_;
|
||||
size_t offset_;
|
||||
size_t setFuncOffset_;
|
||||
};
|
||||
|
||||
class ArraySkipper : public Resolver {
|
||||
public:
|
||||
ArraySkipper(ResolverFactory &factory, const NodePtr &writer);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Skipping array");
|
||||
|
||||
int64_t size;
|
||||
do {
|
||||
size = reader.readArrayBlockSize();
|
||||
for (auto i = 0; i < size; ++i) {
|
||||
resolver_->parse(reader, address);
|
||||
}
|
||||
} while (size != 0);
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtr resolver_;
|
||||
};
|
||||
|
||||
typedef uint8_t *(*GenericArraySetter)(uint8_t *array);
|
||||
|
||||
class ArrayParser : public Resolver {
|
||||
public:
|
||||
ArrayParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Reading array");
|
||||
|
||||
uint8_t *arrayAddress = address + offset_;
|
||||
|
||||
auto *setter = reinterpret_cast<GenericArraySetter *>(address + setFuncOffset_);
|
||||
|
||||
int64_t size;
|
||||
do {
|
||||
size = reader.readArrayBlockSize();
|
||||
for (auto i = 0; i < size; ++i) {
|
||||
// create a new map entry and get the address
|
||||
uint8_t *location = (*setter)(arrayAddress);
|
||||
resolver_->parse(reader, location);
|
||||
}
|
||||
} while (size != 0);
|
||||
}
|
||||
|
||||
protected:
|
||||
ArrayParser() : Resolver(), offset_(0), setFuncOffset_(0) {}
|
||||
|
||||
ResolverPtr resolver_;
|
||||
size_t offset_;
|
||||
size_t setFuncOffset_;
|
||||
};
|
||||
|
||||
class EnumSkipper : public Resolver {
|
||||
public:
|
||||
EnumSkipper(ResolverFactory &, const NodePtr &) : Resolver() {}
|
||||
|
||||
void parse(Reader &reader, uint8_t *) const final {
|
||||
int64_t val = reader.readEnum();
|
||||
DEBUG_OUT("Skipping enum" << val);
|
||||
}
|
||||
};
|
||||
|
||||
class EnumParser : public Resolver {
|
||||
public:
|
||||
enum EnumRepresentation {
|
||||
VAL
|
||||
};
|
||||
|
||||
EnumParser(ResolverFactory &, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(),
|
||||
offset_(offsets.at(0).offset()),
|
||||
readerSize_(reader->names()) {
|
||||
const size_t writerSize = writer->names();
|
||||
|
||||
mapping_.reserve(writerSize);
|
||||
|
||||
for (size_t i = 0; i < writerSize; ++i) {
|
||||
const std::string &name = writer->nameAt(i);
|
||||
size_t readerIndex = readerSize_;
|
||||
reader->nameIndex(name, readerIndex);
|
||||
mapping_.push_back(readerIndex);
|
||||
}
|
||||
}
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
auto val = static_cast<size_t>(reader.readEnum());
|
||||
assert(val < mapping_.size());
|
||||
|
||||
if (mapping_[val] < readerSize_) {
|
||||
auto *location = reinterpret_cast<EnumRepresentation *>(address + offset_);
|
||||
*location = static_cast<EnumRepresentation>(mapping_[val]);
|
||||
DEBUG_OUT("Setting enum" << *location);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
size_t offset_;
|
||||
size_t readerSize_;
|
||||
std::vector<size_t> mapping_;
|
||||
};
|
||||
|
||||
class UnionSkipper : public Resolver {
|
||||
public:
|
||||
UnionSkipper(ResolverFactory &factory, const NodePtr &writer);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Skipping union");
|
||||
auto choice = static_cast<size_t>(reader.readUnion());
|
||||
resolvers_[choice]->parse(reader, address);
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtrVector resolvers_;
|
||||
};
|
||||
|
||||
class UnionParser : public Resolver {
|
||||
public:
|
||||
typedef uint8_t *(*GenericUnionSetter)(uint8_t *, int64_t);
|
||||
|
||||
UnionParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Reading union");
|
||||
auto writerChoice = static_cast<size_t>(reader.readUnion());
|
||||
auto *readerChoice = reinterpret_cast<int64_t *>(address + choiceOffset_);
|
||||
|
||||
*readerChoice = choiceMapping_[writerChoice];
|
||||
auto *setter = reinterpret_cast<GenericUnionSetter *>(address + setFuncOffset_);
|
||||
uint8_t *value = address + offset_;
|
||||
uint8_t *location = (*setter)(value, *readerChoice);
|
||||
|
||||
resolvers_[writerChoice]->parse(reader, location);
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtrVector resolvers_;
|
||||
std::vector<int64_t> choiceMapping_;
|
||||
size_t offset_;
|
||||
size_t choiceOffset_;
|
||||
size_t setFuncOffset_;
|
||||
};
|
||||
|
||||
class UnionToNonUnionParser : public Resolver {
|
||||
public:
|
||||
typedef uint8_t *(*GenericUnionSetter)(uint8_t *, int64_t);
|
||||
|
||||
UnionToNonUnionParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const Layout &offsets);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Reading union to non-union");
|
||||
auto choice = static_cast<size_t>(reader.readUnion());
|
||||
resolvers_[choice]->parse(reader, address);
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtrVector resolvers_;
|
||||
};
|
||||
|
||||
class NonUnionToUnionParser : public Resolver {
|
||||
public:
|
||||
typedef uint8_t *(*GenericUnionSetter)(uint8_t *, int64_t);
|
||||
|
||||
NonUnionToUnionParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const CompoundLayout &offsets);
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Reading non-union to union");
|
||||
|
||||
auto *choice = reinterpret_cast<int64_t *>(address + choiceOffset_);
|
||||
*choice = choice_;
|
||||
auto *setter = reinterpret_cast<GenericUnionSetter *>(address + setFuncOffset_);
|
||||
uint8_t *value = address + offset_;
|
||||
uint8_t *location = (*setter)(value, choice_);
|
||||
|
||||
resolver_->parse(reader, location);
|
||||
}
|
||||
|
||||
protected:
|
||||
ResolverPtr resolver_;
|
||||
size_t choice_;
|
||||
size_t offset_;
|
||||
size_t choiceOffset_;
|
||||
size_t setFuncOffset_;
|
||||
};
|
||||
|
||||
class FixedSkipper : public Resolver {
|
||||
public:
|
||||
FixedSkipper(ResolverFactory &, const NodePtr &writer) : Resolver() {
|
||||
size_ = writer->fixedSize();
|
||||
}
|
||||
|
||||
void parse(Reader &reader, uint8_t *) const final {
|
||||
DEBUG_OUT("Skipping fixed");
|
||||
std::unique_ptr<uint8_t[]> val(new uint8_t[size_]);
|
||||
reader.readFixed(&val[0], size_);
|
||||
}
|
||||
|
||||
protected:
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
class FixedParser : public Resolver {
|
||||
public:
|
||||
FixedParser(ResolverFactory &, const NodePtr &writer, const NodePtr &, const CompoundLayout &offsets) : Resolver() {
|
||||
size_ = writer->fixedSize();
|
||||
offset_ = offsets.at(0).offset();
|
||||
}
|
||||
|
||||
void parse(Reader &reader, uint8_t *address) const final {
|
||||
DEBUG_OUT("Reading fixed");
|
||||
uint8_t *location = address + offset_;
|
||||
reader.readFixed(location, size_);
|
||||
}
|
||||
|
||||
protected:
|
||||
size_t size_;
|
||||
size_t offset_;
|
||||
};
|
||||
|
||||
class ResolverFactory : private boost::noncopyable {
|
||||
|
||||
template<typename T>
|
||||
unique_ptr<Resolver>
|
||||
constructPrimitiveSkipper(const NodePtr &) {
|
||||
return unique_ptr<Resolver>(new PrimitiveSkipper<T>());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
unique_ptr<Resolver>
|
||||
constructPrimitive(const NodePtr &writer, const NodePtr &reader, const Layout &offset) {
|
||||
unique_ptr<Resolver> instruction;
|
||||
|
||||
SchemaResolution match = writer->resolve(*reader);
|
||||
|
||||
if (match == RESOLVE_NO_MATCH) {
|
||||
instruction = unique_ptr<Resolver>(new PrimitiveSkipper<T>());
|
||||
} else if (reader->type() == AVRO_UNION) {
|
||||
const auto &compoundLayout = static_cast<const CompoundLayout &>(offset);
|
||||
instruction = unique_ptr<Resolver>(new NonUnionToUnionParser(*this, writer, reader, compoundLayout));
|
||||
} else if (match == RESOLVE_MATCH) {
|
||||
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
|
||||
instruction = unique_ptr<Resolver>(new PrimitiveParser<T>(primitiveLayout));
|
||||
} else if (match == RESOLVE_PROMOTABLE_TO_LONG) {
|
||||
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
|
||||
instruction = unique_ptr<Resolver>(new PrimitivePromoter<T, int64_t>(primitiveLayout));
|
||||
} else if (match == RESOLVE_PROMOTABLE_TO_FLOAT) {
|
||||
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
|
||||
instruction = unique_ptr<Resolver>(new PrimitivePromoter<T, float>(primitiveLayout));
|
||||
} else if (match == RESOLVE_PROMOTABLE_TO_DOUBLE) {
|
||||
const auto &primitiveLayout = static_cast<const PrimitiveLayout &>(offset);
|
||||
instruction = unique_ptr<Resolver>(new PrimitivePromoter<T, double>(primitiveLayout));
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
return instruction;
|
||||
}
|
||||
|
||||
template<typename Skipper>
|
||||
unique_ptr<Resolver>
|
||||
constructCompoundSkipper(const NodePtr &writer) {
|
||||
return unique_ptr<Resolver>(new Skipper(*this, writer));
|
||||
}
|
||||
|
||||
template<typename Parser, typename Skipper>
|
||||
unique_ptr<Resolver>
|
||||
constructCompound(const NodePtr &writer, const NodePtr &reader, const Layout &offset) {
|
||||
unique_ptr<Resolver> instruction;
|
||||
|
||||
avro::SchemaResolution match = writer->resolve(*reader);
|
||||
|
||||
if (match == RESOLVE_NO_MATCH) {
|
||||
instruction = unique_ptr<Resolver>(new Skipper(*this, writer));
|
||||
} else if (writer->type() != AVRO_UNION && reader->type() == AVRO_UNION) {
|
||||
const auto &compoundLayout = dynamic_cast<const CompoundLayout &>(offset);
|
||||
instruction = unique_ptr<Resolver>(new NonUnionToUnionParser(*this, writer, reader, compoundLayout));
|
||||
} else if (writer->type() == AVRO_UNION && reader->type() != AVRO_UNION) {
|
||||
instruction = unique_ptr<Resolver>(new UnionToNonUnionParser(*this, writer, reader, offset));
|
||||
} else {
|
||||
const auto &compoundLayout = dynamic_cast<const CompoundLayout &>(offset);
|
||||
instruction = unique_ptr<Resolver>(new Parser(*this, writer, reader, compoundLayout));
|
||||
}
|
||||
|
||||
return instruction;
|
||||
}
|
||||
|
||||
public:
|
||||
unique_ptr<Resolver>
|
||||
construct(const NodePtr &writer, const NodePtr &reader, const Layout &offset) {
|
||||
|
||||
typedef unique_ptr<Resolver> (ResolverFactory::*BuilderFunc)(const NodePtr &writer, const NodePtr &reader, const Layout &offset);
|
||||
|
||||
NodePtr currentWriter = (writer->type() == AVRO_SYMBOLIC) ? resolveSymbol(writer) : writer;
|
||||
|
||||
NodePtr currentReader = (reader->type() == AVRO_SYMBOLIC) ? resolveSymbol(reader) : reader;
|
||||
|
||||
static const BuilderFunc funcs[] = {
|
||||
&ResolverFactory::constructPrimitive<std::string>,
|
||||
&ResolverFactory::constructPrimitive<std::vector<uint8_t>>,
|
||||
&ResolverFactory::constructPrimitive<int32_t>,
|
||||
&ResolverFactory::constructPrimitive<int64_t>,
|
||||
&ResolverFactory::constructPrimitive<float>,
|
||||
&ResolverFactory::constructPrimitive<double>,
|
||||
&ResolverFactory::constructPrimitive<bool>,
|
||||
&ResolverFactory::constructPrimitive<Null>,
|
||||
&ResolverFactory::constructCompound<RecordParser, RecordSkipper>,
|
||||
&ResolverFactory::constructCompound<EnumParser, EnumSkipper>,
|
||||
&ResolverFactory::constructCompound<ArrayParser, ArraySkipper>,
|
||||
&ResolverFactory::constructCompound<MapParser, MapSkipper>,
|
||||
&ResolverFactory::constructCompound<UnionParser, UnionSkipper>,
|
||||
&ResolverFactory::constructCompound<FixedParser, FixedSkipper>};
|
||||
|
||||
static_assert((sizeof(funcs) / sizeof(BuilderFunc)) == (AVRO_NUM_TYPES),
|
||||
"Invalid number of builder functions");
|
||||
|
||||
BuilderFunc func = funcs[currentWriter->type()];
|
||||
assert(func);
|
||||
|
||||
return ((this)->*(func))(currentWriter, currentReader, offset);
|
||||
}
|
||||
|
||||
unique_ptr<Resolver>
|
||||
skipper(const NodePtr &writer) {
|
||||
|
||||
typedef unique_ptr<Resolver> (ResolverFactory::*BuilderFunc)(const NodePtr &writer);
|
||||
|
||||
NodePtr currentWriter = (writer->type() == AVRO_SYMBOLIC) ? writer->leafAt(0) : writer;
|
||||
|
||||
static const BuilderFunc funcs[] = {
|
||||
&ResolverFactory::constructPrimitiveSkipper<std::string>,
|
||||
&ResolverFactory::constructPrimitiveSkipper<std::vector<uint8_t>>,
|
||||
&ResolverFactory::constructPrimitiveSkipper<int32_t>,
|
||||
&ResolverFactory::constructPrimitiveSkipper<int64_t>,
|
||||
&ResolverFactory::constructPrimitiveSkipper<float>,
|
||||
&ResolverFactory::constructPrimitiveSkipper<double>,
|
||||
&ResolverFactory::constructPrimitiveSkipper<bool>,
|
||||
&ResolverFactory::constructPrimitiveSkipper<Null>,
|
||||
&ResolverFactory::constructCompoundSkipper<RecordSkipper>,
|
||||
&ResolverFactory::constructCompoundSkipper<EnumSkipper>,
|
||||
&ResolverFactory::constructCompoundSkipper<ArraySkipper>,
|
||||
&ResolverFactory::constructCompoundSkipper<MapSkipper>,
|
||||
&ResolverFactory::constructCompoundSkipper<UnionSkipper>,
|
||||
&ResolverFactory::constructCompoundSkipper<FixedSkipper>};
|
||||
|
||||
static_assert((sizeof(funcs) / sizeof(BuilderFunc)) == (AVRO_NUM_TYPES),
|
||||
"Invalid number of builder functions");
|
||||
|
||||
BuilderFunc func = funcs[currentWriter->type()];
|
||||
assert(func);
|
||||
|
||||
return ((this)->*(func))(currentWriter);
|
||||
}
|
||||
};
|
||||
|
||||
RecordSkipper::RecordSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {
|
||||
size_t leaves = writer->leaves();
|
||||
resolvers_.reserve(leaves);
|
||||
for (size_t i = 0; i < leaves; ++i) {
|
||||
const NodePtr &w = writer->leafAt(i);
|
||||
resolvers_.push_back(factory.skipper(w));
|
||||
}
|
||||
}
|
||||
|
||||
RecordParser::RecordParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const CompoundLayout &offsets) : Resolver() {
|
||||
size_t leaves = writer->leaves();
|
||||
resolvers_.reserve(leaves);
|
||||
for (size_t i = 0; i < leaves; ++i) {
|
||||
|
||||
const NodePtr &w = writer->leafAt(i);
|
||||
|
||||
const std::string &name = writer->nameAt(i);
|
||||
|
||||
size_t readerIndex = 0;
|
||||
bool found = reader->nameIndex(name, readerIndex);
|
||||
|
||||
if (found) {
|
||||
const NodePtr &r = reader->leafAt(readerIndex);
|
||||
resolvers_.push_back(factory.construct(w, r, offsets.at(readerIndex)));
|
||||
} else {
|
||||
resolvers_.push_back(factory.skipper(w));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MapSkipper::MapSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver(),
|
||||
resolver_(factory.skipper(writer->leafAt(1))) {}
|
||||
|
||||
MapParser::MapParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const CompoundLayout &offsets) : Resolver(),
|
||||
resolver_(factory.construct(writer->leafAt(1), reader->leafAt(1), offsets.at(1))),
|
||||
offset_(offsets.offset()),
|
||||
setFuncOffset_(offsets.at(0).offset()) {}
|
||||
|
||||
ArraySkipper::ArraySkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver(),
|
||||
resolver_(factory.skipper(writer->leafAt(0))) {}
|
||||
|
||||
ArrayParser::ArrayParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const CompoundLayout &offsets) : Resolver(),
|
||||
resolver_(factory.construct(writer->leafAt(0), reader->leafAt(0), offsets.at(1))),
|
||||
offset_(offsets.offset()),
|
||||
setFuncOffset_(offsets.at(0).offset()) {}
|
||||
|
||||
UnionSkipper::UnionSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {
|
||||
size_t leaves = writer->leaves();
|
||||
resolvers_.reserve(leaves);
|
||||
for (size_t i = 0; i < leaves; ++i) {
|
||||
const NodePtr &w = writer->leafAt(i);
|
||||
resolvers_.push_back(factory.skipper(w));
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// assumes the writer is NOT a union, and the reader IS a union
|
||||
|
||||
SchemaResolution
|
||||
checkUnionMatch(const NodePtr &writer, const NodePtr &reader, size_t &index) {
|
||||
SchemaResolution bestMatch = RESOLVE_NO_MATCH;
|
||||
|
||||
index = 0;
|
||||
size_t leaves = reader->leaves();
|
||||
|
||||
for (size_t i = 0; i < leaves; ++i) {
|
||||
|
||||
const NodePtr &leaf = reader->leafAt(i);
|
||||
SchemaResolution newMatch = writer->resolve(*leaf);
|
||||
|
||||
if (newMatch == RESOLVE_MATCH) {
|
||||
bestMatch = newMatch;
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
if (bestMatch == RESOLVE_NO_MATCH) {
|
||||
bestMatch = newMatch;
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
UnionParser::UnionParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const CompoundLayout &offsets) : Resolver(),
|
||||
offset_(offsets.offset()),
|
||||
choiceOffset_(offsets.at(0).offset()),
|
||||
setFuncOffset_(offsets.at(1).offset()) {
|
||||
|
||||
size_t leaves = writer->leaves();
|
||||
resolvers_.reserve(leaves);
|
||||
choiceMapping_.reserve(leaves);
|
||||
for (size_t i = 0; i < leaves; ++i) {
|
||||
|
||||
// for each writer, we need a schema match for the reader
|
||||
const NodePtr &w = writer->leafAt(i);
|
||||
size_t index = 0;
|
||||
|
||||
SchemaResolution match = checkUnionMatch(w, reader, index);
|
||||
|
||||
if (match == RESOLVE_NO_MATCH) {
|
||||
resolvers_.push_back(factory.skipper(w));
|
||||
// push back a non-sense number
|
||||
choiceMapping_.push_back(reader->leaves());
|
||||
} else {
|
||||
const NodePtr &r = reader->leafAt(index);
|
||||
resolvers_.push_back(factory.construct(w, r, offsets.at(index + 2)));
|
||||
choiceMapping_.push_back(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NonUnionToUnionParser::NonUnionToUnionParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const CompoundLayout &offsets) : Resolver(),
|
||||
choice_(0),
|
||||
offset_(offsets.offset()),
|
||||
choiceOffset_(offsets.at(0).offset()),
|
||||
setFuncOffset_(offsets.at(1).offset()) {
|
||||
#ifndef NDEBUG
|
||||
SchemaResolution bestMatch =
|
||||
#endif
|
||||
checkUnionMatch(writer, reader, choice_);
|
||||
assert(bestMatch != RESOLVE_NO_MATCH);
|
||||
resolver_ = factory.construct(writer, reader->leafAt(choice_), offsets.at(choice_ + 2));
|
||||
}
|
||||
|
||||
UnionToNonUnionParser::UnionToNonUnionParser(ResolverFactory &factory,
|
||||
const NodePtr &writer,
|
||||
const NodePtr &reader,
|
||||
const Layout &offsets) : Resolver() {
|
||||
size_t leaves = writer->leaves();
|
||||
resolvers_.reserve(leaves);
|
||||
for (size_t i = 0; i < leaves; ++i) {
|
||||
const NodePtr &w = writer->leafAt(i);
|
||||
resolvers_.push_back(factory.construct(w, reader, offsets));
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<Resolver> constructResolver(const ValidSchema &writerSchema,
|
||||
const ValidSchema &readerSchema,
|
||||
const Layout &readerLayout) {
|
||||
ResolverFactory factory;
|
||||
return factory.construct(writerSchema.root(), readerSchema.root(), readerLayout);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ResolverSchema.hh"
|
||||
#include "Resolver.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
ResolverSchema::ResolverSchema(
|
||||
const ValidSchema &writerSchema,
|
||||
const ValidSchema &readerSchema,
|
||||
const Layout &readerLayout) : resolver_(constructResolver(writerSchema, readerSchema, readerLayout)) {}
|
||||
|
||||
void ResolverSchema::parse(Reader &reader, uint8_t *address) {
|
||||
resolver_->parse(reader, address);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "CustomAttributes.hh"
|
||||
#include "Schema.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
RecordSchema::RecordSchema(const std::string &name) : Schema(new NodeRecord) {
|
||||
node_->setName(Name(name));
|
||||
}
|
||||
|
||||
void RecordSchema::addField(const std::string &name, const Schema &fieldSchema) {
|
||||
const CustomAttributes emptyCustomAttribute;
|
||||
addField(name, fieldSchema, emptyCustomAttribute);
|
||||
}
|
||||
|
||||
void RecordSchema::addField(const std::string &name, const Schema &fieldSchema, const CustomAttributes &customFields) {
|
||||
// add the name first. it will throw if the name is a duplicate, preventing
|
||||
// the leaf from being added
|
||||
node_->addName(name);
|
||||
|
||||
node_->addLeaf(fieldSchema.root());
|
||||
|
||||
node_->addCustomAttributesForField(customFields);
|
||||
}
|
||||
|
||||
std::string RecordSchema::getDoc() const {
|
||||
return node_->getDoc();
|
||||
}
|
||||
void RecordSchema::setDoc(const std::string &doc) {
|
||||
node_->setDoc(doc);
|
||||
}
|
||||
|
||||
EnumSchema::EnumSchema(const std::string &name) : Schema(new NodeEnum) {
|
||||
node_->setName(Name(name));
|
||||
}
|
||||
|
||||
void EnumSchema::addSymbol(const std::string &symbol) {
|
||||
node_->addName(symbol);
|
||||
}
|
||||
|
||||
ArraySchema::ArraySchema(const Schema &itemsSchema) : Schema(new NodeArray) {
|
||||
node_->addLeaf(itemsSchema.root());
|
||||
}
|
||||
|
||||
ArraySchema::ArraySchema(const ArraySchema &itemsSchema) : Schema(new NodeArray) {
|
||||
node_->addLeaf(itemsSchema.root());
|
||||
}
|
||||
|
||||
MapSchema::MapSchema(const Schema &valuesSchema) : Schema(new NodeMap) {
|
||||
node_->addLeaf(valuesSchema.root());
|
||||
}
|
||||
|
||||
MapSchema::MapSchema(const MapSchema &valuesSchema) : Schema(new NodeMap) {
|
||||
node_->addLeaf(valuesSchema.root());
|
||||
}
|
||||
|
||||
UnionSchema::UnionSchema() : Schema(new NodeUnion) {}
|
||||
|
||||
void UnionSchema::addType(const Schema &typeSchema) {
|
||||
if (typeSchema.type() == AVRO_UNION) {
|
||||
throw Exception("Cannot add unions to unions");
|
||||
}
|
||||
|
||||
if (typeSchema.type() == AVRO_RECORD) {
|
||||
// check for duplicate records
|
||||
size_t types = node_->leaves();
|
||||
for (size_t i = 0; i < types; ++i) {
|
||||
const NodePtr &leaf = node_->leafAt(i);
|
||||
// TODO, more checks?
|
||||
if (leaf->type() == AVRO_RECORD && leaf->name() == typeSchema.root()->name()) {
|
||||
throw Exception("Records in unions cannot have duplicate names");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node_->addLeaf(typeSchema.root());
|
||||
}
|
||||
|
||||
FixedSchema::FixedSchema(int size, const std::string &name) : Schema(new NodeFixed) {
|
||||
node_->setFixedSize(size);
|
||||
node_->setName(Name(name));
|
||||
}
|
||||
|
||||
SymbolicSchema::SymbolicSchema(const Name &name, const NodePtr &link) : Schema(new NodeSymbolic(HasName(name), link)) {
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,189 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Stream.hh"
|
||||
#include <vector>
|
||||
|
||||
namespace avro {
|
||||
|
||||
using std::vector;
|
||||
|
||||
class MemoryInputStream : public InputStream {
|
||||
const std::vector<uint8_t *> &data_;
|
||||
const size_t chunkSize_;
|
||||
const size_t size_;
|
||||
const size_t available_;
|
||||
size_t cur_;
|
||||
size_t curLen_;
|
||||
|
||||
size_t maxLen() {
|
||||
size_t n = (cur_ == (size_ - 1)) ? available_ : chunkSize_;
|
||||
if (n == curLen_) {
|
||||
if (cur_ == (size_ - 1)) {
|
||||
return 0;
|
||||
}
|
||||
++cur_;
|
||||
n = (cur_ == (size_ - 1)) ? available_ : chunkSize_;
|
||||
curLen_ = 0;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
public:
|
||||
MemoryInputStream(const std::vector<uint8_t *> &b,
|
||||
size_t chunkSize, size_t available) : data_(b), chunkSize_(chunkSize), size_(b.size()),
|
||||
available_(available), cur_(0), curLen_(0) {}
|
||||
|
||||
bool next(const uint8_t **data, size_t *len) final {
|
||||
if (size_t n = maxLen()) {
|
||||
*data = data_[cur_] + curLen_;
|
||||
*len = n - curLen_;
|
||||
curLen_ = n;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void backup(size_t len) final {
|
||||
curLen_ -= len;
|
||||
}
|
||||
|
||||
void skip(size_t len) final {
|
||||
while (len > 0) {
|
||||
if (size_t n = maxLen()) {
|
||||
if ((curLen_ + len) < n) {
|
||||
n = curLen_ + len;
|
||||
}
|
||||
len -= n - curLen_;
|
||||
curLen_ = n;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t byteCount() const final {
|
||||
return cur_ * chunkSize_ + curLen_;
|
||||
}
|
||||
};
|
||||
|
||||
class MemoryInputStream2 : public InputStream {
|
||||
const uint8_t *const data_;
|
||||
const size_t size_;
|
||||
size_t curLen_;
|
||||
|
||||
public:
|
||||
MemoryInputStream2(const uint8_t *data, size_t len)
|
||||
: data_(data), size_(len), curLen_(0) {}
|
||||
|
||||
bool next(const uint8_t **data, size_t *len) final {
|
||||
if (curLen_ == size_) {
|
||||
return false;
|
||||
}
|
||||
*data = &data_[curLen_];
|
||||
*len = size_ - curLen_;
|
||||
curLen_ = size_;
|
||||
return true;
|
||||
}
|
||||
|
||||
void backup(size_t len) final {
|
||||
curLen_ -= len;
|
||||
}
|
||||
|
||||
void skip(size_t len) final {
|
||||
if (len > (size_ - curLen_)) {
|
||||
len = size_ - curLen_;
|
||||
}
|
||||
curLen_ += len;
|
||||
}
|
||||
|
||||
size_t byteCount() const final {
|
||||
return curLen_;
|
||||
}
|
||||
};
|
||||
|
||||
class MemoryOutputStream final : public OutputStream {
|
||||
public:
|
||||
const size_t chunkSize_;
|
||||
std::vector<uint8_t *> data_;
|
||||
size_t available_;
|
||||
size_t byteCount_;
|
||||
|
||||
explicit MemoryOutputStream(size_t chunkSize) : chunkSize_(chunkSize),
|
||||
available_(0), byteCount_(0) {}
|
||||
~MemoryOutputStream() final {
|
||||
for (std::vector<uint8_t *>::const_iterator it = data_.begin();
|
||||
it != data_.end(); ++it) {
|
||||
delete[] *it;
|
||||
}
|
||||
}
|
||||
|
||||
bool next(uint8_t **data, size_t *len) final {
|
||||
if (available_ == 0) {
|
||||
data_.push_back(new uint8_t[chunkSize_]);
|
||||
available_ = chunkSize_;
|
||||
}
|
||||
*data = &data_.back()[chunkSize_ - available_];
|
||||
*len = available_;
|
||||
byteCount_ += available_;
|
||||
available_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void backup(size_t len) final {
|
||||
available_ += len;
|
||||
byteCount_ -= len;
|
||||
}
|
||||
|
||||
uint64_t byteCount() const final {
|
||||
return byteCount_;
|
||||
}
|
||||
|
||||
void flush() final {}
|
||||
};
|
||||
|
||||
std::unique_ptr<OutputStream> memoryOutputStream(size_t chunkSize) {
|
||||
return std::unique_ptr<OutputStream>(new MemoryOutputStream(chunkSize));
|
||||
}
|
||||
|
||||
std::unique_ptr<InputStream> memoryInputStream(const uint8_t *data, size_t len) {
|
||||
return std::unique_ptr<InputStream>(new MemoryInputStream2(data, len));
|
||||
}
|
||||
|
||||
std::unique_ptr<InputStream> memoryInputStream(const OutputStream &source) {
|
||||
const auto &mos =
|
||||
dynamic_cast<const MemoryOutputStream &>(source);
|
||||
return (mos.data_.empty()) ? std::unique_ptr<InputStream>(new MemoryInputStream2(nullptr, 0)) : std::unique_ptr<InputStream>(new MemoryInputStream(mos.data_, mos.chunkSize_, (mos.chunkSize_ - mos.available_)));
|
||||
}
|
||||
|
||||
std::shared_ptr<std::vector<uint8_t>> snapshot(const OutputStream &source) {
|
||||
const auto &mos =
|
||||
dynamic_cast<const MemoryOutputStream &>(source);
|
||||
std::shared_ptr<std::vector<uint8_t>> result(new std::vector<uint8_t>());
|
||||
size_t c = mos.byteCount_;
|
||||
result->reserve(mos.byteCount_);
|
||||
for (auto it = mos.data_.begin();
|
||||
it != mos.data_.end(); ++it) {
|
||||
size_t n = std::min(c, mos.chunkSize_);
|
||||
std::copy(*it, *it + n, std::back_inserter(*result));
|
||||
c -= n;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Types.hh"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace avro {
|
||||
namespace strings {
|
||||
const std::string typeToString[] = {
|
||||
"string",
|
||||
"bytes",
|
||||
"int",
|
||||
"long",
|
||||
"float",
|
||||
"double",
|
||||
"boolean",
|
||||
"null",
|
||||
"record",
|
||||
"enum",
|
||||
"array",
|
||||
"map",
|
||||
"union",
|
||||
"fixed",
|
||||
"symbolic"};
|
||||
|
||||
static_assert((sizeof(typeToString) / sizeof(std::string)) == (AVRO_NUM_TYPES + 1),
|
||||
"Incorrect Avro typeToString");
|
||||
|
||||
} // namespace strings
|
||||
|
||||
// this static assert exists because a 32 bit integer is used as a bit-flag for each type,
|
||||
// and it would be a problem for this flag if we ever supported more than 32 types
|
||||
static_assert(AVRO_NUM_TYPES < 32, "Too many Avro types");
|
||||
|
||||
const std::string &toString(Type type) noexcept {
|
||||
static std::string undefinedType = "Undefined type";
|
||||
if (isAvroTypeOrPseudoType(type)) {
|
||||
return strings::typeToString[type];
|
||||
} else {
|
||||
return undefinedType;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, Type type) {
|
||||
if (isAvroTypeOrPseudoType(type)) {
|
||||
os << strings::typeToString[type];
|
||||
} else {
|
||||
os << static_cast<int>(type);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const Null &) {
|
||||
os << "(null value)";
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,178 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cctype>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
#include "Node.hh"
|
||||
#include "Schema.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
using std::make_pair;
|
||||
using std::ostringstream;
|
||||
using std::shared_ptr;
|
||||
using std::static_pointer_cast;
|
||||
using std::string;
|
||||
|
||||
namespace avro {
|
||||
using SymbolMap = std::map<Name, NodePtr>;
|
||||
|
||||
static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
|
||||
if (!node->isValid()) {
|
||||
throw Exception("Schema is invalid, due to bad node of type {}", node->type());
|
||||
}
|
||||
|
||||
if (node->hasName()) {
|
||||
const Name &nm = node->name();
|
||||
// FIXME: replace "find" with "lower_bound". The author seems to have intended
|
||||
// "lower_bound" here because of (1) the check for the contents of the iterator
|
||||
// that follows and (2) use of the iterator in insert later in the code.
|
||||
auto it = symbolMap.find(nm);
|
||||
auto found = it != symbolMap.end() && nm == it->first;
|
||||
|
||||
if (node->type() == AVRO_SYMBOLIC) {
|
||||
if (!found) {
|
||||
throw Exception("Symbolic name \"{}\" is unknown", node->name());
|
||||
}
|
||||
|
||||
shared_ptr<NodeSymbolic> symNode =
|
||||
static_pointer_cast<NodeSymbolic>(node);
|
||||
|
||||
// if the symbolic link is already resolved, we return true,
|
||||
// otherwise returning false will force it to be resolved
|
||||
return symNode->isSet();
|
||||
}
|
||||
|
||||
if (found) {
|
||||
return false;
|
||||
}
|
||||
symbolMap.insert(it, make_pair(nm, node));
|
||||
}
|
||||
|
||||
node->lock();
|
||||
size_t leaves = node->leaves();
|
||||
for (size_t i = 0; i < leaves; ++i) {
|
||||
const NodePtr &leaf(node->leafAt(i));
|
||||
|
||||
if (!validate(leaf, symbolMap)) {
|
||||
|
||||
// if validate returns false it means a node with this name already
|
||||
// existed in the map, instead of keeping this node twice in the
|
||||
// map (which could potentially create circular shared pointer
|
||||
// links that would not be freed), replace this node with a
|
||||
// symbolic link to the original one.
|
||||
|
||||
node->setLeafToSymbolic(i, symbolMap.find(leaf->name())->second);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void validate(const NodePtr &p) {
|
||||
SymbolMap m;
|
||||
validate(p, m);
|
||||
}
|
||||
|
||||
ValidSchema::ValidSchema(NodePtr root) : root_(std::move(root)) {
|
||||
validate(root_);
|
||||
}
|
||||
|
||||
ValidSchema::ValidSchema(const Schema &schema) : root_(schema.root()) {
|
||||
validate(root_);
|
||||
}
|
||||
|
||||
ValidSchema::ValidSchema() : root_(NullSchema().root()) {
|
||||
validate(root_);
|
||||
}
|
||||
|
||||
void ValidSchema::setSchema(const Schema &schema) {
|
||||
root_ = schema.root();
|
||||
validate(root_);
|
||||
}
|
||||
|
||||
void ValidSchema::toJson(std::ostream &os) const {
|
||||
root_->printJson(os, 0);
|
||||
os << '\n';
|
||||
}
|
||||
|
||||
string
|
||||
ValidSchema::toJson(bool prettyPrint) const {
|
||||
ostringstream oss;
|
||||
toJson(oss);
|
||||
if (!prettyPrint) {
|
||||
return compactSchema(oss.str());
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
void ValidSchema::toFlatList(std::ostream &os) const {
|
||||
root_->printBasicInfo(os);
|
||||
}
|
||||
|
||||
/*
|
||||
* compactSchema compacts and returns a formatted string representation
|
||||
* of a ValidSchema object by removing the whitespaces outside of the quoted
|
||||
* field names and values. It can handle the cases where the quoted value is
|
||||
* in UTF-8 format. Note that this method is not responsible for validating
|
||||
* the schema.
|
||||
*/
|
||||
string ValidSchema::compactSchema(const string &schema) {
|
||||
auto insideQuote = false;
|
||||
size_t newPos = 0;
|
||||
string data = schema;
|
||||
|
||||
for (auto c : schema) {
|
||||
if (!insideQuote && std::isspace(c)) {
|
||||
// Skip the white spaces outside quotes.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '\"') {
|
||||
// It is valid for a quote to be part of the value for some fields,
|
||||
// e.g., the "doc" field. In that case, the quote is expected to be
|
||||
// escaped inside the schema. Since the escape character '\\' could
|
||||
// be escaped itself, we need to check whether there are an even
|
||||
// number of consecutive slashes prior to the quote.
|
||||
auto leadingSlashes = 0;
|
||||
for (int i = static_cast<int>(newPos) - 1; i >= 0; i--) {
|
||||
if (data[i] == '\\') {
|
||||
leadingSlashes++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (leadingSlashes % 2 == 0) {
|
||||
// Found a real quote which identifies either the start or the
|
||||
// end of a field name or value.
|
||||
insideQuote = !insideQuote;
|
||||
}
|
||||
}
|
||||
data[newPos++] = c;
|
||||
}
|
||||
if (insideQuote) {
|
||||
throw Exception("Schema is not well formed with mismatched quotes");
|
||||
}
|
||||
if (newPos < schema.size()) {
|
||||
data.resize(newPos);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,253 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "NodeImpl.hh"
|
||||
#include "ValidSchema.hh"
|
||||
#include "Validator.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
Validator::Validator(ValidSchema schema) : schema_(std::move(schema)),
|
||||
nextType_(AVRO_NULL),
|
||||
expectedTypesFlag_(0),
|
||||
compoundStarted_(false),
|
||||
waitingForCount_(false),
|
||||
count_(0) {
|
||||
setupOperation(schema_.root());
|
||||
}
|
||||
|
||||
void Validator::setWaitingForCount() {
|
||||
waitingForCount_ = true;
|
||||
count_ = 0;
|
||||
expectedTypesFlag_ = typeToFlag(AVRO_INT) | typeToFlag(AVRO_LONG);
|
||||
nextType_ = AVRO_LONG;
|
||||
}
|
||||
|
||||
void Validator::enumAdvance() {
|
||||
if (compoundStarted_) {
|
||||
setWaitingForCount();
|
||||
compoundStarted_ = false;
|
||||
} else {
|
||||
waitingForCount_ = false;
|
||||
compoundStack_.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
bool Validator::countingSetup() {
|
||||
auto proceed = true;
|
||||
if (compoundStarted_) {
|
||||
setWaitingForCount();
|
||||
compoundStarted_ = false;
|
||||
proceed = false;
|
||||
} else if (waitingForCount_) {
|
||||
waitingForCount_ = false;
|
||||
if (count_ == 0) {
|
||||
compoundStack_.pop_back();
|
||||
proceed = false;
|
||||
} else {
|
||||
counters_.push_back(count_);
|
||||
}
|
||||
}
|
||||
|
||||
return proceed;
|
||||
}
|
||||
|
||||
void Validator::countingAdvance() {
|
||||
if (countingSetup()) {
|
||||
size_t index = (compoundStack_.back().pos)++;
|
||||
const NodePtr &node = compoundStack_.back().node;
|
||||
|
||||
if (index < node->leaves()) {
|
||||
setupOperation(node->leafAt(index));
|
||||
} else {
|
||||
compoundStack_.back().pos = 0;
|
||||
size_t count = --counters_.back();
|
||||
if (count == 0) {
|
||||
counters_.pop_back();
|
||||
compoundStarted_ = true;
|
||||
nextType_ = node->type();
|
||||
expectedTypesFlag_ = typeToFlag(nextType_);
|
||||
} else {
|
||||
index = (compoundStack_.back().pos)++;
|
||||
setupOperation(node->leafAt(index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Validator::unionAdvance() {
|
||||
if (compoundStarted_) {
|
||||
setWaitingForCount();
|
||||
compoundStarted_ = false;
|
||||
} else {
|
||||
waitingForCount_ = false;
|
||||
NodePtr node = compoundStack_.back().node;
|
||||
|
||||
if (count_ < node->leaves()) {
|
||||
compoundStack_.pop_back();
|
||||
setupOperation(node->leafAt(static_cast<int>(count_)));
|
||||
} else {
|
||||
throw Exception(
|
||||
"Union selection out of range, got {}, expecting 0-{}",
|
||||
count_, node->leaves() - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Validator::fixedAdvance() {
|
||||
compoundStarted_ = false;
|
||||
compoundStack_.pop_back();
|
||||
}
|
||||
|
||||
size_t Validator::nextSizeExpected() const {
|
||||
return compoundStack_.back().node->fixedSize();
|
||||
}
|
||||
|
||||
void Validator::doAdvance() {
|
||||
using AdvanceFunc = void (Validator::*)();
|
||||
|
||||
// only the compound types need advance functions here
|
||||
static const AdvanceFunc funcs[] = {
|
||||
nullptr, // string
|
||||
nullptr, // bytes
|
||||
nullptr, // int
|
||||
nullptr, // long
|
||||
nullptr, // float
|
||||
nullptr, // double
|
||||
nullptr, // bool
|
||||
nullptr, // null
|
||||
&Validator::countingAdvance, // Record is treated like counting with count == 1
|
||||
&Validator::enumAdvance,
|
||||
&Validator::countingAdvance,
|
||||
&Validator::countingAdvance,
|
||||
&Validator::unionAdvance,
|
||||
&Validator::fixedAdvance};
|
||||
static_assert((sizeof(funcs) / sizeof(AdvanceFunc)) == (AVRO_NUM_TYPES),
|
||||
"Invalid number of advance functions");
|
||||
|
||||
expectedTypesFlag_ = 0;
|
||||
// loop until we encounter a next expected type, or we've exited all compound types
|
||||
while (!expectedTypesFlag_ && !compoundStack_.empty()) {
|
||||
|
||||
Type type = compoundStack_.back().node->type();
|
||||
|
||||
AdvanceFunc func = funcs[type];
|
||||
|
||||
// only compound functions are put on the status stack so it is ok to
|
||||
// assume that func is not null
|
||||
assert(func);
|
||||
|
||||
((this)->*(func))();
|
||||
}
|
||||
|
||||
if (compoundStack_.empty()) {
|
||||
nextType_ = AVRO_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Validator::advance() {
|
||||
if (!waitingForCount_) {
|
||||
doAdvance();
|
||||
}
|
||||
}
|
||||
|
||||
void Validator::setCount(size_t count) {
|
||||
if (!waitingForCount_) {
|
||||
throw Exception("Not expecting count");
|
||||
}
|
||||
count_ = count;
|
||||
|
||||
doAdvance();
|
||||
}
|
||||
|
||||
void Validator::setupFlag(Type type) {
|
||||
// use flags instead of strictly types, so that we can be more lax about the type
|
||||
// (for example, a long should be able to accept an int type, but not vice versa)
|
||||
static const flag_t flags[] = {
|
||||
typeToFlag(AVRO_STRING) | typeToFlag(AVRO_BYTES),
|
||||
typeToFlag(AVRO_STRING) | typeToFlag(AVRO_BYTES),
|
||||
typeToFlag(AVRO_INT),
|
||||
typeToFlag(AVRO_INT) | typeToFlag(AVRO_LONG),
|
||||
typeToFlag(AVRO_FLOAT),
|
||||
typeToFlag(AVRO_DOUBLE),
|
||||
typeToFlag(AVRO_BOOL),
|
||||
typeToFlag(AVRO_NULL),
|
||||
typeToFlag(AVRO_RECORD),
|
||||
typeToFlag(AVRO_ENUM),
|
||||
typeToFlag(AVRO_ARRAY),
|
||||
typeToFlag(AVRO_MAP),
|
||||
typeToFlag(AVRO_UNION),
|
||||
typeToFlag(AVRO_FIXED)};
|
||||
static_assert((sizeof(flags) / sizeof(flag_t)) == (AVRO_NUM_TYPES),
|
||||
"Invalid number of avro type flags");
|
||||
|
||||
expectedTypesFlag_ = flags[type];
|
||||
}
|
||||
|
||||
void Validator::setupOperation(const NodePtr &node) {
|
||||
nextType_ = node->type();
|
||||
|
||||
if (nextType_ == AVRO_SYMBOLIC) {
|
||||
NodePtr actualNode = resolveSymbol(node);
|
||||
assert(actualNode);
|
||||
setupOperation(actualNode);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(nextType_ < AVRO_SYMBOLIC);
|
||||
|
||||
setupFlag(nextType_);
|
||||
|
||||
if (!isPrimitive(nextType_)) {
|
||||
compoundStack_.emplace_back(node);
|
||||
compoundStarted_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Validator::getCurrentRecordName(std::string &name) const {
|
||||
auto found = false;
|
||||
name.clear();
|
||||
|
||||
// if the top of the stack is a record I want this record name
|
||||
auto idx = static_cast<int>(compoundStack_.size() - ((!compoundStack_.empty() && (isPrimitive(nextType_) || nextType_ == AVRO_RECORD)) ? 1 : 2));
|
||||
|
||||
if (idx >= 0 && compoundStack_[idx].node->type() == AVRO_RECORD) {
|
||||
name = compoundStack_[idx].node->name().simpleName();
|
||||
found = true;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
bool Validator::getNextFieldName(std::string &name) const {
|
||||
auto found = false;
|
||||
name.clear();
|
||||
auto idx = static_cast<int>(compoundStack_.size() - (isCompound(nextType_) ? 2 : 1));
|
||||
if (idx >= 0 && compoundStack_[idx].node->type() == AVRO_RECORD) {
|
||||
size_t pos = compoundStack_[idx].pos - 1;
|
||||
const NodePtr &node = compoundStack_[idx].node;
|
||||
if (pos < node->leaves()) {
|
||||
name = node->nameAt(pos);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Zigzag.hh"
|
||||
|
||||
namespace avro {
|
||||
// TODO: The following two functions have exactly the same code except for the type.
|
||||
// They should be implemented as a template.
|
||||
size_t
|
||||
encodeInt64(int64_t input, std::array<uint8_t, 10> &output) noexcept {
|
||||
auto val = encodeZigzag64(input);
|
||||
|
||||
// put values in an array of bytes with variable length encoding
|
||||
const int mask = 0x7F;
|
||||
auto v = val & mask;
|
||||
size_t bytesOut = 0;
|
||||
while (val >>= 7) {
|
||||
output[bytesOut++] = static_cast<uint8_t>(v | 0x80);
|
||||
v = val & mask;
|
||||
}
|
||||
|
||||
output[bytesOut++] = static_cast<uint8_t>(v);
|
||||
return bytesOut;
|
||||
}
|
||||
size_t
|
||||
encodeInt32(int32_t input, std::array<uint8_t, 5> &output) noexcept {
|
||||
auto val = encodeZigzag32(input);
|
||||
|
||||
// put values in an array of bytes with variable length encoding
|
||||
const int mask = 0x7F;
|
||||
auto v = val & mask;
|
||||
size_t bytesOut = 0;
|
||||
while (val >>= 7) {
|
||||
output[bytesOut++] = static_cast<uint8_t>(v | 0x80);
|
||||
v = val & mask;
|
||||
}
|
||||
|
||||
output[bytesOut++] = static_cast<uint8_t>(v);
|
||||
return bytesOut;
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,873 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cctype>
|
||||
#ifndef _WIN32
|
||||
#include <ctime>
|
||||
#endif
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <utility>
|
||||
|
||||
#include "Compiler.hh"
|
||||
#include "NodeImpl.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
using avro::NodePtr;
|
||||
using avro::resolveSymbol;
|
||||
using std::ifstream;
|
||||
using std::map;
|
||||
using std::ofstream;
|
||||
using std::ostream;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
using boost::lexical_cast;
|
||||
|
||||
using avro::compileJsonSchema;
|
||||
using avro::ValidSchema;
|
||||
|
||||
struct PendingSetterGetter {
|
||||
string structName;
|
||||
string type;
|
||||
string name;
|
||||
size_t idx;
|
||||
|
||||
PendingSetterGetter(string sn, string t, string n, size_t i) : structName(std::move(sn)), type(std::move(t)), name(std::move(n)), idx(i) {}
|
||||
};
|
||||
|
||||
struct PendingConstructor {
|
||||
string structName;
|
||||
string memberName;
|
||||
bool initMember;
|
||||
PendingConstructor(string sn, string n, bool im) : structName(std::move(sn)), memberName(std::move(n)), initMember(im) {}
|
||||
};
|
||||
|
||||
class CodeGen {
|
||||
size_t unionNumber_;
|
||||
std::ostream &os_;
|
||||
bool inNamespace_;
|
||||
const std::string ns_;
|
||||
const std::string schemaFile_;
|
||||
const std::string headerFile_;
|
||||
const std::string includePrefix_;
|
||||
const bool noUnion_;
|
||||
const std::string guardString_;
|
||||
boost::mt19937 random_;
|
||||
|
||||
vector<PendingSetterGetter> pendingGettersAndSetters;
|
||||
vector<PendingConstructor> pendingConstructors;
|
||||
|
||||
map<NodePtr, string> done;
|
||||
set<NodePtr> doing;
|
||||
|
||||
std::string guard();
|
||||
std::string fullname(const string &name) const;
|
||||
std::string generateEnumType(const NodePtr &n);
|
||||
std::string cppTypeOf(const NodePtr &n);
|
||||
std::string generateRecordType(const NodePtr &n);
|
||||
std::string unionName();
|
||||
std::string generateUnionType(const NodePtr &n);
|
||||
std::string generateType(const NodePtr &n);
|
||||
std::string generateDeclaration(const NodePtr &n);
|
||||
std::string doGenerateType(const NodePtr &n);
|
||||
void generateEnumTraits(const NodePtr &n);
|
||||
void generateTraits(const NodePtr &n);
|
||||
void generateRecordTraits(const NodePtr &n);
|
||||
void generateUnionTraits(const NodePtr &n);
|
||||
void emitCopyright();
|
||||
void emitGeneratedWarning();
|
||||
|
||||
public:
|
||||
CodeGen(std::ostream &os, std::string ns,
|
||||
std::string schemaFile, std::string headerFile,
|
||||
std::string guardString,
|
||||
std::string includePrefix, bool noUnion) : unionNumber_(0), os_(os), inNamespace_(false), ns_(std::move(ns)),
|
||||
schemaFile_(std::move(schemaFile)), headerFile_(std::move(headerFile)),
|
||||
includePrefix_(std::move(includePrefix)), noUnion_(noUnion),
|
||||
guardString_(std::move(guardString)),
|
||||
random_(static_cast<uint32_t>(::time(nullptr))) {
|
||||
}
|
||||
|
||||
void generate(const ValidSchema &schema);
|
||||
};
|
||||
|
||||
static string decorate(const std::string &name) {
|
||||
static const char *cppReservedWords[] = {
|
||||
"alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break",
|
||||
"case", "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept",
|
||||
"const", "consteval", "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return",
|
||||
"co_yield", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else",
|
||||
"enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if",
|
||||
"import", "inline", "int", "long", "module", "mutable", "namespace", "new", "noexcept", "not",
|
||||
"not_eq", "nullptr", "operator", "or", "or_eq", "private", "protected", "public", "reflexpr",
|
||||
"register", "reinterpret_cast", "requires", "return", "short", "signed", "sizeof", "static",
|
||||
"static_assert", "static_cast", "struct", "switch", "synchronized", "template", "this",
|
||||
"thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned",
|
||||
"using", "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq"};
|
||||
|
||||
for (auto &cppReservedWord : cppReservedWords)
|
||||
if (strcmp(name.c_str(), cppReservedWord) == 0)
|
||||
return name + '_';
|
||||
return name;
|
||||
}
|
||||
|
||||
static string decorate(const avro::Name &name) {
|
||||
return decorate(name.simpleName());
|
||||
}
|
||||
|
||||
string CodeGen::fullname(const string &name) const {
|
||||
return ns_.empty() ? name : (ns_ + "::" + name);
|
||||
}
|
||||
|
||||
string CodeGen::generateEnumType(const NodePtr &n) {
|
||||
string s = decorate(n->name());
|
||||
os_ << "enum class " << s << ": unsigned {\n";
|
||||
size_t c = n->names();
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
os_ << " " << decorate(n->nameAt(i)) << ",\n";
|
||||
}
|
||||
os_ << "};\n\n";
|
||||
return s;
|
||||
}
|
||||
|
||||
string CodeGen::cppTypeOf(const NodePtr &n) {
|
||||
switch (n->type()) {
|
||||
case avro::AVRO_STRING:
|
||||
return "std::string";
|
||||
case avro::AVRO_BYTES:
|
||||
return "std::vector<uint8_t>";
|
||||
case avro::AVRO_INT:
|
||||
return "int32_t";
|
||||
case avro::AVRO_LONG:
|
||||
return "int64_t";
|
||||
case avro::AVRO_FLOAT:
|
||||
return "float";
|
||||
case avro::AVRO_DOUBLE:
|
||||
return "double";
|
||||
case avro::AVRO_BOOL:
|
||||
return "bool";
|
||||
case avro::AVRO_RECORD:
|
||||
case avro::AVRO_ENUM: {
|
||||
string nm = decorate(n->name());
|
||||
return inNamespace_ ? nm : fullname(nm);
|
||||
}
|
||||
case avro::AVRO_ARRAY:
|
||||
return "std::vector<" + cppTypeOf(n->leafAt(0)) + " >";
|
||||
case avro::AVRO_MAP:
|
||||
return "std::map<std::string, " + cppTypeOf(n->leafAt(1)) + " >";
|
||||
case avro::AVRO_FIXED:
|
||||
return "std::array<uint8_t, " + lexical_cast<string>(n->fixedSize()) + ">";
|
||||
case avro::AVRO_SYMBOLIC:
|
||||
return cppTypeOf(resolveSymbol(n));
|
||||
case avro::AVRO_UNION:
|
||||
return fullname(done[n]);
|
||||
case avro::AVRO_NULL:
|
||||
return "avro::null";
|
||||
default:
|
||||
return "$Undefined$";
|
||||
}
|
||||
}
|
||||
|
||||
static string cppNameOf(const NodePtr &n) {
|
||||
switch (n->type()) {
|
||||
case avro::AVRO_NULL:
|
||||
return "null";
|
||||
case avro::AVRO_STRING:
|
||||
return "string";
|
||||
case avro::AVRO_BYTES:
|
||||
return "bytes";
|
||||
case avro::AVRO_INT:
|
||||
return "int";
|
||||
case avro::AVRO_LONG:
|
||||
return "long";
|
||||
case avro::AVRO_FLOAT:
|
||||
return "float";
|
||||
case avro::AVRO_DOUBLE:
|
||||
return "double";
|
||||
case avro::AVRO_BOOL:
|
||||
return "bool";
|
||||
case avro::AVRO_RECORD:
|
||||
case avro::AVRO_ENUM:
|
||||
case avro::AVRO_FIXED:
|
||||
return decorate(n->name());
|
||||
case avro::AVRO_ARRAY:
|
||||
return "array";
|
||||
case avro::AVRO_MAP:
|
||||
return "map";
|
||||
case avro::AVRO_SYMBOLIC:
|
||||
return cppNameOf(resolveSymbol(n));
|
||||
default:
|
||||
return "$Undefined$";
|
||||
}
|
||||
}
|
||||
|
||||
string CodeGen::generateRecordType(const NodePtr &n) {
|
||||
size_t c = n->leaves();
|
||||
string decoratedName = decorate(n->name());
|
||||
vector<string> types;
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
types.push_back(generateType(n->leafAt(i)));
|
||||
}
|
||||
|
||||
map<NodePtr, string>::const_iterator it = done.find(n);
|
||||
if (it != done.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
os_ << "struct " << decoratedName << " {\n";
|
||||
if (!noUnion_) {
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
if (n->leafAt(i)->type() == avro::AVRO_UNION) {
|
||||
os_ << " typedef " << types[i]
|
||||
<< ' ' << n->nameAt(i) << "_t;\n";
|
||||
types[i] = n->nameAt(i) + "_t";
|
||||
}
|
||||
if (n->leafAt(i)->type() == avro::AVRO_ARRAY && n->leafAt(i)->leafAt(0)->type() == avro::AVRO_UNION) {
|
||||
os_ << " typedef " << types[i] << "::value_type"
|
||||
<< ' ' << n->nameAt(i) << "_item_t;\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
// the nameAt(i) does not take c++ reserved words into account
|
||||
// so we need to call decorate on it
|
||||
std::string decoratedNameAt = decorate(n->nameAt(i));
|
||||
os_ << " " << types[i];
|
||||
os_ << ' ' << decoratedNameAt << ";\n";
|
||||
}
|
||||
|
||||
os_ << " " << decoratedName << "()";
|
||||
if (c > 0) {
|
||||
os_ << " :";
|
||||
}
|
||||
os_ << "\n";
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
// the nameAt(i) does not take c++ reserved words into account
|
||||
// so we need to call decorate on it
|
||||
std::string decoratedNameAt = decorate(n->nameAt(i));
|
||||
os_ << " " << decoratedNameAt << "(";
|
||||
os_ << types[i];
|
||||
os_ << "())";
|
||||
if (i != (c - 1)) {
|
||||
os_ << ',';
|
||||
}
|
||||
os_ << "\n";
|
||||
}
|
||||
os_ << " { }\n";
|
||||
os_ << "};\n\n";
|
||||
return decoratedName;
|
||||
}
|
||||
|
||||
void makeCanonical(string &s, bool foldCase) {
|
||||
for (char &c : s) {
|
||||
if (isalpha(c)) {
|
||||
if (foldCase) {
|
||||
c = static_cast<char>(toupper(c));
|
||||
}
|
||||
} else if (!isdigit(c)) {
|
||||
c = '_';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string CodeGen::unionName() {
|
||||
string s = schemaFile_;
|
||||
string::size_type n = s.find_last_of("/\\");
|
||||
if (n != string::npos) {
|
||||
s = s.substr(n);
|
||||
}
|
||||
makeCanonical(s, false);
|
||||
|
||||
return s + "_Union__" + boost::lexical_cast<string>(unionNumber_++) + "__";
|
||||
}
|
||||
|
||||
static void generateGetterAndSetter(ostream &os,
|
||||
const string &structName, const string &type, const string &name,
|
||||
size_t idx) {
|
||||
string sn = " " + structName + "::";
|
||||
|
||||
os << "inline\n";
|
||||
|
||||
os << type << sn << "get_" << name << "() const {\n"
|
||||
<< " if (idx_ != " << idx << ") {\n"
|
||||
<< " throw avro::Exception(\"Invalid type for "
|
||||
<< "union " << structName << "\");\n"
|
||||
<< " }\n"
|
||||
<< " return std::any_cast<" << type << " >(value_);\n"
|
||||
<< "}\n\n";
|
||||
|
||||
os << "inline\n"
|
||||
<< "void" << sn << "set_" << name
|
||||
<< "(const " << type << "& v) {\n"
|
||||
<< " idx_ = " << idx << ";\n"
|
||||
<< " value_ = v;\n"
|
||||
<< "}\n\n";
|
||||
}
|
||||
|
||||
static void generateConstructor(ostream &os,
|
||||
const string &structName, bool initMember,
|
||||
const string &type) {
|
||||
os << "inline " << structName << "::" << structName << "() : idx_(0)";
|
||||
if (initMember) {
|
||||
os << ", value_(" << type << "())";
|
||||
}
|
||||
os << " { }\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a type for union and emits the code.
|
||||
* Since unions can encounter names that are not fully defined yet,
|
||||
* such names must be declared and the inline functions deferred until all
|
||||
* types are fully defined.
|
||||
*/
|
||||
string CodeGen::generateUnionType(const NodePtr &n) {
|
||||
size_t c = n->leaves();
|
||||
vector<string> types;
|
||||
vector<string> names;
|
||||
|
||||
auto it = doing.find(n);
|
||||
if (it != doing.end()) {
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &nn = n->leafAt(i);
|
||||
types.push_back(generateDeclaration(nn));
|
||||
names.push_back(cppNameOf(nn));
|
||||
}
|
||||
} else {
|
||||
doing.insert(n);
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &nn = n->leafAt(i);
|
||||
types.push_back(generateType(nn));
|
||||
names.push_back(cppNameOf(nn));
|
||||
}
|
||||
doing.erase(n);
|
||||
}
|
||||
if (done.find(n) != done.end()) {
|
||||
return done[n];
|
||||
}
|
||||
|
||||
auto result = unionName();
|
||||
|
||||
os_ << "struct " << result << " {\n"
|
||||
<< "private:\n"
|
||||
<< " size_t idx_;\n"
|
||||
<< " std::any value_;\n"
|
||||
<< "public:\n"
|
||||
<< " size_t idx() const { return idx_; }\n";
|
||||
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &nn = n->leafAt(i);
|
||||
if (nn->type() == avro::AVRO_NULL) {
|
||||
os_ << " bool is_null() const {\n"
|
||||
<< " return (idx_ == " << i << ");\n"
|
||||
<< " }\n"
|
||||
<< " void set_null() {\n"
|
||||
<< " idx_ = " << i << ";\n"
|
||||
<< " value_ = std::any();\n"
|
||||
<< " }\n";
|
||||
} else {
|
||||
const string &type = types[i];
|
||||
const string &name = names[i];
|
||||
os_ << " " << type << " get_" << name << "() const;\n"
|
||||
" void set_"
|
||||
<< name << "(const " << type << "& v);\n";
|
||||
pendingGettersAndSetters.emplace_back(result, type, name, i);
|
||||
}
|
||||
}
|
||||
|
||||
os_ << " " << result << "();\n";
|
||||
pendingConstructors.emplace_back(result, types[0],
|
||||
n->leafAt(0)->type() != avro::AVRO_NULL);
|
||||
os_ << "};\n\n";
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the type for the given schema node and emits code to os.
|
||||
*/
|
||||
string CodeGen::generateType(const NodePtr &n) {
|
||||
NodePtr nn = (n->type() == avro::AVRO_SYMBOLIC) ? resolveSymbol(n) : n;
|
||||
|
||||
map<NodePtr, string>::const_iterator it = done.find(nn);
|
||||
if (it != done.end()) {
|
||||
return it->second;
|
||||
}
|
||||
string result = doGenerateType(nn);
|
||||
done[nn] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
string CodeGen::doGenerateType(const NodePtr &n) {
|
||||
switch (n->type()) {
|
||||
case avro::AVRO_STRING:
|
||||
case avro::AVRO_BYTES:
|
||||
case avro::AVRO_INT:
|
||||
case avro::AVRO_LONG:
|
||||
case avro::AVRO_FLOAT:
|
||||
case avro::AVRO_DOUBLE:
|
||||
case avro::AVRO_BOOL:
|
||||
case avro::AVRO_NULL:
|
||||
case avro::AVRO_FIXED:
|
||||
return cppTypeOf(n);
|
||||
case avro::AVRO_ARRAY: {
|
||||
const NodePtr &ln = n->leafAt(0);
|
||||
string dn;
|
||||
if (doing.find(n) == doing.end()) {
|
||||
doing.insert(n);
|
||||
dn = generateType(ln);
|
||||
doing.erase(n);
|
||||
} else {
|
||||
dn = generateDeclaration(ln);
|
||||
}
|
||||
return "std::vector<" + dn + " >";
|
||||
}
|
||||
case avro::AVRO_MAP: {
|
||||
const NodePtr &ln = n->leafAt(1);
|
||||
string dn;
|
||||
if (doing.find(n) == doing.end()) {
|
||||
doing.insert(n);
|
||||
dn = generateType(ln);
|
||||
doing.erase(n);
|
||||
} else {
|
||||
dn = generateDeclaration(ln);
|
||||
}
|
||||
return "std::map<std::string, " + dn + " >";
|
||||
}
|
||||
case avro::AVRO_RECORD:
|
||||
return generateRecordType(n);
|
||||
case avro::AVRO_ENUM:
|
||||
return generateEnumType(n);
|
||||
case avro::AVRO_UNION:
|
||||
return generateUnionType(n);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return "$Undefined$";
|
||||
}
|
||||
|
||||
string CodeGen::generateDeclaration(const NodePtr &n) {
|
||||
NodePtr nn = (n->type() == avro::AVRO_SYMBOLIC) ? resolveSymbol(n) : n;
|
||||
switch (nn->type()) {
|
||||
case avro::AVRO_STRING:
|
||||
case avro::AVRO_BYTES:
|
||||
case avro::AVRO_INT:
|
||||
case avro::AVRO_LONG:
|
||||
case avro::AVRO_FLOAT:
|
||||
case avro::AVRO_DOUBLE:
|
||||
case avro::AVRO_BOOL:
|
||||
case avro::AVRO_NULL:
|
||||
case avro::AVRO_FIXED:
|
||||
return cppTypeOf(nn);
|
||||
case avro::AVRO_ARRAY:
|
||||
return "std::vector<" + generateDeclaration(nn->leafAt(0)) + " >";
|
||||
case avro::AVRO_MAP:
|
||||
return "std::map<std::string, " + generateDeclaration(nn->leafAt(1)) + " >";
|
||||
case avro::AVRO_RECORD:
|
||||
os_ << "struct " << cppTypeOf(nn) << ";\n";
|
||||
return cppTypeOf(nn);
|
||||
case avro::AVRO_ENUM:
|
||||
return generateEnumType(nn);
|
||||
case avro::AVRO_UNION:
|
||||
// FIXME: When can this happen?
|
||||
return generateUnionType(nn);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return "$Undefined$";
|
||||
}
|
||||
|
||||
void CodeGen::generateEnumTraits(const NodePtr &n) {
|
||||
string dname = decorate(n->name());
|
||||
string fn = fullname(dname);
|
||||
|
||||
// the nameAt(i) does not take c++ reserved words into account
|
||||
// so we need to call decorate on it
|
||||
string last = decorate(n->nameAt(n->names() - 1));
|
||||
|
||||
os_ << "template<> struct codec_traits<" << fn << "> {\n"
|
||||
<< " static void encode(Encoder& e, " << fn << " v) {\n"
|
||||
<< " if (v > " << fn << "::" << last << ")\n"
|
||||
<< " {\n"
|
||||
<< " std::ostringstream error;\n"
|
||||
<< R"( error << "enum value " << static_cast<unsigned>(v) << " is out of bound for )" << fn
|
||||
<< " and cannot be encoded\";\n"
|
||||
<< " throw avro::Exception(error.str());\n"
|
||||
<< " }\n"
|
||||
<< " e.encodeEnum(static_cast<size_t>(v));\n"
|
||||
<< " }\n"
|
||||
<< " static void decode(Decoder& d, " << fn << "& v) {\n"
|
||||
<< " size_t index = d.decodeEnum();\n"
|
||||
<< " if (index > static_cast<size_t>(" << fn << "::" << last << "))\n"
|
||||
<< " {\n"
|
||||
<< " std::ostringstream error;\n"
|
||||
<< R"( error << "enum value " << index << " is out of bound for )" << fn
|
||||
<< " and cannot be decoded\";\n"
|
||||
<< " throw avro::Exception(error.str());\n"
|
||||
<< " }\n"
|
||||
<< " v = static_cast<" << fn << ">(index);\n"
|
||||
<< " }\n"
|
||||
<< "};\n\n";
|
||||
}
|
||||
|
||||
void CodeGen::generateRecordTraits(const NodePtr &n) {
|
||||
size_t c = n->leaves();
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
generateTraits(n->leafAt(i));
|
||||
}
|
||||
|
||||
string fn = fullname(decorate(n->name()));
|
||||
os_ << "template<> struct codec_traits<" << fn << "> {\n";
|
||||
|
||||
if (c == 0) {
|
||||
os_ << " static void encode(Encoder&, const " << fn << "&) {}\n";
|
||||
// ResolvingDecoder::fieldOrder mutates the state of the decoder, so if that decoder is
|
||||
// passed in, we need to call the method even though it will return an empty vector.
|
||||
os_ << " static void decode(Decoder& d, " << fn << "&) {\n";
|
||||
os_ << " if (avro::ResolvingDecoder *rd = dynamic_cast<avro::ResolvingDecoder *>(&d)) {\n";
|
||||
os_ << " rd->fieldOrder();\n";
|
||||
os_ << " }\n";
|
||||
os_ << " }\n";
|
||||
os_ << "};\n";
|
||||
return;
|
||||
}
|
||||
|
||||
os_ << " static void encode(Encoder& e, const " << fn << "& v) {\n";
|
||||
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
// the nameAt(i) does not take c++ reserved words into account
|
||||
// so we need to call decorate on it
|
||||
std::string decoratedNameAt = decorate(n->nameAt(i));
|
||||
os_ << " avro::encode(e, v." << decoratedNameAt << ");\n";
|
||||
}
|
||||
|
||||
os_ << " }\n"
|
||||
<< " static void decode(Decoder& d, " << fn << "& v) {\n";
|
||||
os_ << " if (avro::ResolvingDecoder *rd =\n";
|
||||
os_ << " dynamic_cast<avro::ResolvingDecoder *>(&d)) {\n";
|
||||
os_ << " const std::vector<size_t> fo = rd->fieldOrder();\n";
|
||||
os_ << " for (std::vector<size_t>::const_iterator it = fo.begin();\n";
|
||||
os_ << " it != fo.end(); ++it) {\n";
|
||||
os_ << " switch (*it) {\n";
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
// the nameAt(i) does not take c++ reserved words into account
|
||||
// so we need to call decorate on it
|
||||
std::string decoratedNameAt = decorate(n->nameAt(i));
|
||||
os_ << " case " << i << ":\n";
|
||||
os_ << " avro::decode(d, v." << decoratedNameAt << ");\n";
|
||||
os_ << " break;\n";
|
||||
}
|
||||
os_ << " default:\n";
|
||||
os_ << " break;\n";
|
||||
os_ << " }\n";
|
||||
os_ << " }\n";
|
||||
os_ << " } else {\n";
|
||||
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
// the nameAt(i) does not take c++ reserved words into account
|
||||
// so we need to call decorate on it
|
||||
std::string decoratedNameAt = decorate(n->nameAt(i));
|
||||
os_ << " avro::decode(d, v." << decoratedNameAt << ");\n";
|
||||
}
|
||||
os_ << " }\n";
|
||||
|
||||
os_ << " }\n"
|
||||
<< "};\n\n";
|
||||
}
|
||||
|
||||
void CodeGen::generateUnionTraits(const NodePtr &n) {
|
||||
size_t c = n->leaves();
|
||||
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &nn = n->leafAt(i);
|
||||
generateTraits(nn);
|
||||
}
|
||||
|
||||
string name = done[n];
|
||||
string fn = fullname(name);
|
||||
|
||||
os_ << "template<> struct codec_traits<" << fn << "> {\n"
|
||||
<< " static void encode(Encoder& e, " << fn << " v) {\n"
|
||||
<< " e.encodeUnionIndex(v.idx());\n"
|
||||
<< " switch (v.idx()) {\n";
|
||||
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &nn = n->leafAt(i);
|
||||
os_ << " case " << i << ":\n";
|
||||
if (nn->type() == avro::AVRO_NULL) {
|
||||
os_ << " e.encodeNull();\n";
|
||||
} else {
|
||||
os_ << " avro::encode(e, v.get_" << cppNameOf(nn)
|
||||
<< "());\n";
|
||||
}
|
||||
os_ << " break;\n";
|
||||
}
|
||||
|
||||
os_ << " }\n"
|
||||
<< " }\n"
|
||||
<< " static void decode(Decoder& d, " << fn << "& v) {\n"
|
||||
<< " size_t n = d.decodeUnionIndex();\n"
|
||||
<< " if (n >= " << c << ") { throw avro::Exception(\""
|
||||
"Union index too big\"); }\n"
|
||||
<< " switch (n) {\n";
|
||||
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &nn = n->leafAt(i);
|
||||
os_ << " case " << i << ":\n";
|
||||
if (nn->type() == avro::AVRO_NULL) {
|
||||
os_ << " d.decodeNull();\n"
|
||||
<< " v.set_null();\n";
|
||||
} else {
|
||||
os_ << " {\n"
|
||||
<< " " << cppTypeOf(nn) << " vv;\n"
|
||||
<< " avro::decode(d, vv);\n"
|
||||
<< " v.set_" << cppNameOf(nn) << "(vv);\n"
|
||||
<< " }\n";
|
||||
}
|
||||
os_ << " break;\n";
|
||||
}
|
||||
os_ << " }\n"
|
||||
<< " }\n"
|
||||
<< "};\n\n";
|
||||
}
|
||||
|
||||
void CodeGen::generateTraits(const NodePtr &n) {
|
||||
switch (n->type()) {
|
||||
case avro::AVRO_STRING:
|
||||
case avro::AVRO_BYTES:
|
||||
case avro::AVRO_INT:
|
||||
case avro::AVRO_LONG:
|
||||
case avro::AVRO_FLOAT:
|
||||
case avro::AVRO_DOUBLE:
|
||||
case avro::AVRO_BOOL:
|
||||
case avro::AVRO_NULL:
|
||||
break;
|
||||
case avro::AVRO_RECORD:
|
||||
generateRecordTraits(n);
|
||||
break;
|
||||
case avro::AVRO_ENUM:
|
||||
generateEnumTraits(n);
|
||||
break;
|
||||
case avro::AVRO_ARRAY:
|
||||
case avro::AVRO_MAP:
|
||||
generateTraits(n->leafAt(n->type() == avro::AVRO_ARRAY ? 0 : 1));
|
||||
break;
|
||||
case avro::AVRO_UNION:
|
||||
generateUnionTraits(n);
|
||||
break;
|
||||
case avro::AVRO_FIXED:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGen::emitCopyright() {
|
||||
os_ << "/**\n"
|
||||
" * Licensed to the Apache Software Foundation (ASF) under one\n"
|
||||
" * or more contributor license agreements. See the NOTICE file\n"
|
||||
" * distributed with this work for additional information\n"
|
||||
" * regarding copyright ownership. The ASF licenses this file\n"
|
||||
" * to you under the Apache License, Version 2.0 (the\n"
|
||||
" * \"License\"); you may not use this file except in compliance\n"
|
||||
" * with the License. You may obtain a copy of the License at\n"
|
||||
" *\n"
|
||||
" * https://www.apache.org/licenses/LICENSE-2.0\n"
|
||||
" *\n"
|
||||
" * Unless required by applicable law or agreed to in writing, "
|
||||
"software\n"
|
||||
" * distributed under the License is distributed on an "
|
||||
"\"AS IS\" BASIS,\n"
|
||||
" * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express "
|
||||
"or implied.\n"
|
||||
" * See the License for the specific language governing "
|
||||
"permissions and\n"
|
||||
" * limitations under the License.\n"
|
||||
" */\n\n";
|
||||
}
|
||||
|
||||
void CodeGen::emitGeneratedWarning() {
|
||||
os_ << "/* This code was generated by avrogencpp " << AVRO_VERSION << ". Do not edit.*/\n\n";
|
||||
}
|
||||
|
||||
string CodeGen::guard() {
|
||||
string h = headerFile_;
|
||||
makeCanonical(h, true);
|
||||
return h + "_" + lexical_cast<string>(random_()) + "_H";
|
||||
}
|
||||
|
||||
void CodeGen::generate(const ValidSchema &schema) {
|
||||
emitCopyright();
|
||||
emitGeneratedWarning();
|
||||
|
||||
string h = guardString_.empty() ? guard() : guardString_;
|
||||
|
||||
os_ << "#ifndef " << h << "\n";
|
||||
os_ << "#define " << h << "\n\n\n";
|
||||
|
||||
os_ << "#include <sstream>\n"
|
||||
<< "#include <any>\n"
|
||||
<< "#include \"" << includePrefix_ << "Specific.hh\"\n"
|
||||
<< "#include \"" << includePrefix_ << "Encoder.hh\"\n"
|
||||
<< "#include \"" << includePrefix_ << "Decoder.hh\"\n"
|
||||
<< "\n";
|
||||
|
||||
if (!ns_.empty()) {
|
||||
os_ << "namespace " << ns_ << " {\n";
|
||||
inNamespace_ = true;
|
||||
}
|
||||
|
||||
const NodePtr &root = schema.root();
|
||||
generateType(root);
|
||||
|
||||
for (vector<PendingSetterGetter>::const_iterator it =
|
||||
pendingGettersAndSetters.begin();
|
||||
it != pendingGettersAndSetters.end(); ++it) {
|
||||
generateGetterAndSetter(os_, it->structName, it->type, it->name,
|
||||
it->idx);
|
||||
}
|
||||
|
||||
for (vector<PendingConstructor>::const_iterator it =
|
||||
pendingConstructors.begin();
|
||||
it != pendingConstructors.end(); ++it) {
|
||||
generateConstructor(os_, it->structName,
|
||||
it->initMember, it->memberName);
|
||||
}
|
||||
|
||||
if (!ns_.empty()) {
|
||||
inNamespace_ = false;
|
||||
os_ << "}\n";
|
||||
}
|
||||
|
||||
os_ << "namespace avro {\n";
|
||||
|
||||
unionNumber_ = 0;
|
||||
|
||||
generateTraits(root);
|
||||
|
||||
os_ << "}\n";
|
||||
|
||||
os_ << "#endif\n";
|
||||
os_.flush();
|
||||
}
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
static string readGuard(const string &filename) {
|
||||
std::ifstream ifs(filename.c_str());
|
||||
string buf;
|
||||
string candidate;
|
||||
while (std::getline(ifs, buf)) {
|
||||
boost::algorithm::trim(buf);
|
||||
if (candidate.empty()) {
|
||||
if (boost::algorithm::starts_with(buf, "#ifndef ")) {
|
||||
candidate = buf.substr(8);
|
||||
}
|
||||
} else if (boost::algorithm::starts_with(buf, "#define ")) {
|
||||
if (candidate == buf.substr(8)) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
candidate.erase();
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
const string NS("namespace");
|
||||
const string OUT_FILE("output");
|
||||
const string IN_FILE("input");
|
||||
const string INCLUDE_PREFIX("include-prefix");
|
||||
const string NO_UNION_TYPEDEF("no-union-typedef");
|
||||
|
||||
po::options_description desc("Allowed options");
|
||||
// clang-format off
|
||||
desc.add_options()
|
||||
("help,h", "produce help message")
|
||||
("version,V", "produce version information")
|
||||
("include-prefix,p", po::value<string>()->default_value("avro"), "prefix for include headers, - for none, default: avro")
|
||||
("no-union-typedef,U", "do not generate typedefs for unions in records")
|
||||
("namespace,n", po::value<string>(), "set namespace for generated code")
|
||||
("input,i", po::value<string>(), "input file")
|
||||
("output,o", po::value<string>(), "output file to generate");
|
||||
// clang-format on
|
||||
|
||||
po::variables_map vm;
|
||||
po::store(po::parse_command_line(argc, argv, desc), vm);
|
||||
po::notify(vm);
|
||||
|
||||
if (vm.count("help")) {
|
||||
std::cout << desc << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vm.count("version")) {
|
||||
std::cout << AVRO_VERSION << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vm.count(IN_FILE) == 0 || vm.count(OUT_FILE) == 0) {
|
||||
std::cout << desc << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
string ns = vm.count(NS) > 0 ? vm[NS].as<string>() : string();
|
||||
string outf = vm.count(OUT_FILE) > 0 ? vm[OUT_FILE].as<string>() : string();
|
||||
string inf = vm.count(IN_FILE) > 0 ? vm[IN_FILE].as<string>() : string();
|
||||
string incPrefix = vm[INCLUDE_PREFIX].as<string>();
|
||||
bool noUnion = vm.count(NO_UNION_TYPEDEF) != 0;
|
||||
|
||||
if (incPrefix == "-") {
|
||||
incPrefix.clear();
|
||||
} else if (*incPrefix.rbegin() != '/') {
|
||||
incPrefix += "/";
|
||||
}
|
||||
|
||||
try {
|
||||
ValidSchema schema;
|
||||
|
||||
if (!inf.empty()) {
|
||||
ifstream in(inf.c_str());
|
||||
compileJsonSchema(in, schema);
|
||||
} else {
|
||||
compileJsonSchema(std::cin, schema);
|
||||
}
|
||||
|
||||
if (!outf.empty()) {
|
||||
string g = readGuard(outf);
|
||||
ofstream out(outf.c_str());
|
||||
CodeGen(out, ns, inf, outf, g, incPrefix, noUnion).generate(schema);
|
||||
} else {
|
||||
CodeGen(std::cout, ns, inf, outf, "", incPrefix, noUnion).generate(schema);
|
||||
}
|
||||
return 0;
|
||||
} catch (std::exception &e) {
|
||||
std::cerr << "Failed to parse or compile schema: "
|
||||
<< e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "JsonDom.hh"
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "JsonIO.hh"
|
||||
#include "Stream.hh"
|
||||
|
||||
namespace avro {
|
||||
namespace json {
|
||||
const char *typeToString(EntityType t) {
|
||||
switch (t) {
|
||||
case EntityType::Null: return "null";
|
||||
case EntityType::Bool: return "bool";
|
||||
case EntityType::Long: return "long";
|
||||
case EntityType::Double: return "double";
|
||||
case EntityType::String: return "string";
|
||||
case EntityType::Arr: return "array";
|
||||
case EntityType::Obj: return "object";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
Entity readEntity(JsonParser &p) {
|
||||
switch (p.peek()) {
|
||||
case JsonParser::Token::Null:
|
||||
p.advance();
|
||||
return Entity(p.line());
|
||||
case JsonParser::Token::Bool:
|
||||
p.advance();
|
||||
return Entity(p.boolValue(), p.line());
|
||||
case JsonParser::Token::Long:
|
||||
p.advance();
|
||||
return Entity(p.longValue(), p.line());
|
||||
case JsonParser::Token::Double:
|
||||
p.advance();
|
||||
return Entity(p.doubleValue(), p.line());
|
||||
case JsonParser::Token::String:
|
||||
p.advance();
|
||||
return Entity(std::make_shared<String>(p.rawString()), p.line());
|
||||
case JsonParser::Token::ArrayStart: {
|
||||
size_t l = p.line();
|
||||
p.advance();
|
||||
std::shared_ptr<Array> v = std::make_shared<Array>();
|
||||
while (p.peek() != JsonParser::Token::ArrayEnd) {
|
||||
v->push_back(readEntity(p));
|
||||
}
|
||||
p.advance();
|
||||
return Entity(v, l);
|
||||
}
|
||||
case JsonParser::Token::ObjectStart: {
|
||||
size_t l = p.line();
|
||||
p.advance();
|
||||
std::shared_ptr<Object> v = std::make_shared<Object>();
|
||||
while (p.peek() != JsonParser::Token::ObjectEnd) {
|
||||
p.advance();
|
||||
std::string k = p.stringValue();
|
||||
Entity n = readEntity(p);
|
||||
v->insert(std::make_pair(k, n));
|
||||
}
|
||||
p.advance();
|
||||
return Entity(v, l);
|
||||
}
|
||||
default:
|
||||
throw std::domain_error(JsonParser::toString(p.peek()));
|
||||
}
|
||||
}
|
||||
|
||||
Entity loadEntity(const char *text) {
|
||||
return loadEntity(reinterpret_cast<const uint8_t *>(text), ::strlen(text));
|
||||
}
|
||||
|
||||
Entity loadEntity(InputStream &in) {
|
||||
JsonParser p;
|
||||
p.init(in);
|
||||
return readEntity(p);
|
||||
}
|
||||
|
||||
Entity loadEntity(const uint8_t *text, size_t len) {
|
||||
std::unique_ptr<InputStream> in = memoryInputStream(text, len);
|
||||
return loadEntity(*in);
|
||||
}
|
||||
|
||||
void writeEntity(JsonGenerator<JsonNullFormatter> &g, const Entity &n) {
|
||||
switch (n.type()) {
|
||||
case EntityType::Null:
|
||||
g.encodeNull();
|
||||
break;
|
||||
case EntityType::Bool:
|
||||
g.encodeBool(n.boolValue());
|
||||
break;
|
||||
case EntityType::Long:
|
||||
g.encodeNumber(n.longValue());
|
||||
break;
|
||||
case EntityType::Double:
|
||||
g.encodeNumber(n.doubleValue());
|
||||
break;
|
||||
case EntityType::String:
|
||||
g.encodeString(n.stringValue());
|
||||
break;
|
||||
case EntityType::Arr: {
|
||||
g.arrayStart();
|
||||
const Array &v = n.arrayValue();
|
||||
for (const auto &it : v) {
|
||||
writeEntity(g, it);
|
||||
}
|
||||
g.arrayEnd();
|
||||
} break;
|
||||
case EntityType::Obj: {
|
||||
g.objectStart();
|
||||
const Object &v = n.objectValue();
|
||||
for (const auto &it : v) {
|
||||
g.encodeString(it.first);
|
||||
writeEntity(g, it.second);
|
||||
}
|
||||
g.objectEnd();
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
void Entity::ensureType(EntityType type) const {
|
||||
if (type_ != type) {
|
||||
throw Exception("Invalid type. Expected \"{}\" actual {}", typeToString(type), typeToString(type_));
|
||||
}
|
||||
}
|
||||
|
||||
String Entity::stringValue() const {
|
||||
ensureType(EntityType::String);
|
||||
return JsonParser::toStringValue(**boost::any_cast<std::shared_ptr<String>>(&value_));
|
||||
}
|
||||
|
||||
String Entity::bytesValue() const {
|
||||
ensureType(EntityType::String);
|
||||
return JsonParser::toBytesValue(**boost::any_cast<std::shared_ptr<String>>(&value_));
|
||||
}
|
||||
|
||||
std::string Entity::toString() const {
|
||||
std::unique_ptr<OutputStream> out = memoryOutputStream();
|
||||
JsonGenerator<JsonNullFormatter> g;
|
||||
g.init(*out);
|
||||
writeEntity(g, *this);
|
||||
g.flush();
|
||||
std::unique_ptr<InputStream> in = memoryInputStream(*out);
|
||||
const uint8_t *p = nullptr;
|
||||
size_t n = 0;
|
||||
size_t c = 0;
|
||||
while (in->next(&p, &n)) {
|
||||
c += n;
|
||||
}
|
||||
std::string result;
|
||||
result.resize(c);
|
||||
c = 0;
|
||||
std::unique_ptr<InputStream> in2 = memoryInputStream(*out);
|
||||
while (in2->next(&p, &n)) {
|
||||
::memcpy(&result[c], p, n);
|
||||
c += n;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace json
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_json_JsonDom_hh__
|
||||
#define avro_json_JsonDom_hh__
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "boost/any.hpp"
|
||||
|
||||
namespace avro {
|
||||
|
||||
class AVRO_DECL InputStream;
|
||||
|
||||
namespace json {
|
||||
class Entity;
|
||||
|
||||
typedef bool Bool;
|
||||
typedef int64_t Long;
|
||||
typedef double Double;
|
||||
typedef std::string String;
|
||||
typedef std::vector<Entity> Array;
|
||||
typedef std::map<std::string, Entity> Object;
|
||||
|
||||
class AVRO_DECL JsonParser;
|
||||
class JsonNullFormatter;
|
||||
|
||||
template<typename F = JsonNullFormatter>
|
||||
class AVRO_DECL JsonGenerator;
|
||||
|
||||
enum class EntityType {
|
||||
Null,
|
||||
Bool,
|
||||
Long,
|
||||
Double,
|
||||
String,
|
||||
Arr,
|
||||
Obj
|
||||
};
|
||||
|
||||
const char *typeToString(EntityType t);
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, EntityType et) {
|
||||
return os << typeToString(et);
|
||||
}
|
||||
|
||||
class AVRO_DECL Entity {
|
||||
EntityType type_;
|
||||
boost::any value_;
|
||||
size_t line_; // can't be const else noncopyable...
|
||||
|
||||
void ensureType(EntityType) const;
|
||||
|
||||
public:
|
||||
explicit Entity(size_t line = 0) : type_(EntityType::Null), line_(line) {}
|
||||
// Not explicit because do want implicit conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
explicit Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {}
|
||||
// Not explicit because do want implicit conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
explicit Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {}
|
||||
// Not explicit because do want implicit conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
explicit Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {}
|
||||
// Not explicit because do want implicit conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
explicit Entity(const std::shared_ptr<String> &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {}
|
||||
// Not explicit because do want implicit conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
explicit Entity(const std::shared_ptr<Array> &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {}
|
||||
// Not explicit because do want implicit conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
explicit Entity(const std::shared_ptr<Object> &v, size_t line = 0) : type_(EntityType::Obj), value_(v), line_(line) {}
|
||||
|
||||
EntityType type() const { return type_; }
|
||||
|
||||
size_t line() const { return line_; }
|
||||
|
||||
Bool boolValue() const {
|
||||
ensureType(EntityType::Bool);
|
||||
return boost::any_cast<Bool>(value_);
|
||||
}
|
||||
|
||||
Long longValue() const {
|
||||
ensureType(EntityType::Long);
|
||||
return boost::any_cast<Long>(value_);
|
||||
}
|
||||
|
||||
Double doubleValue() const {
|
||||
ensureType(EntityType::Double);
|
||||
return boost::any_cast<Double>(value_);
|
||||
}
|
||||
|
||||
String stringValue() const;
|
||||
|
||||
String bytesValue() const;
|
||||
|
||||
const Array &arrayValue() const {
|
||||
ensureType(EntityType::Arr);
|
||||
return **boost::any_cast<std::shared_ptr<Array>>(&value_);
|
||||
}
|
||||
|
||||
const Object &objectValue() const {
|
||||
ensureType(EntityType::Obj);
|
||||
return **boost::any_cast<std::shared_ptr<Object>>(&value_);
|
||||
}
|
||||
|
||||
std::string toString() const;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct type_traits {
|
||||
};
|
||||
|
||||
template<>
|
||||
struct type_traits<bool> {
|
||||
static EntityType type() { return EntityType::Bool; }
|
||||
static const char *name() { return "bool"; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct type_traits<int64_t> {
|
||||
static EntityType type() { return EntityType::Long; }
|
||||
static const char *name() { return "long"; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct type_traits<double> {
|
||||
static EntityType type() { return EntityType::Double; }
|
||||
static const char *name() { return "double"; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct type_traits<std::string> {
|
||||
static EntityType type() { return EntityType::String; }
|
||||
static const char *name() { return "string"; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct type_traits<std::vector<Entity>> {
|
||||
static EntityType type() { return EntityType::Arr; }
|
||||
static const char *name() { return "array"; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct type_traits<std::map<std::string, Entity>> {
|
||||
static EntityType type() { return EntityType::Obj; }
|
||||
static const char *name() { return "object"; }
|
||||
};
|
||||
|
||||
AVRO_DECL Entity readEntity(JsonParser &p);
|
||||
|
||||
AVRO_DECL Entity loadEntity(InputStream &in);
|
||||
AVRO_DECL Entity loadEntity(const char *text);
|
||||
AVRO_DECL Entity loadEntity(const uint8_t *text, size_t len);
|
||||
|
||||
void writeEntity(JsonGenerator<JsonNullFormatter> &g, const Entity &n);
|
||||
|
||||
} // namespace json
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,456 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "JsonIO.hh"
|
||||
|
||||
namespace avro {
|
||||
namespace json {
|
||||
|
||||
using std::ostringstream;
|
||||
using std::string;
|
||||
|
||||
const char *const
|
||||
JsonParser::tokenNames[] = {
|
||||
"Null",
|
||||
"Bool",
|
||||
"Integer",
|
||||
"Double",
|
||||
"String",
|
||||
"Array start",
|
||||
"Array end",
|
||||
"Object start",
|
||||
"Object end",
|
||||
};
|
||||
|
||||
char JsonParser::next() {
|
||||
char ch = hasNext ? nextChar : ' ';
|
||||
while (isspace(ch)) {
|
||||
if (ch == '\n') {
|
||||
line_++;
|
||||
}
|
||||
ch = in_.read();
|
||||
}
|
||||
hasNext = false;
|
||||
return ch;
|
||||
}
|
||||
|
||||
void JsonParser::expectToken(Token tk) {
|
||||
if (advance() != tk) {
|
||||
if (tk == Token::Double) {
|
||||
if (cur() == Token::String
|
||||
&& (sv == "Infinity" || sv == "-Infinity" || sv == "NaN")) {
|
||||
curToken = Token::Double;
|
||||
dv = sv == "Infinity" ? std::numeric_limits<double>::infinity() : sv == "-Infinity" ? -std::numeric_limits<double>::infinity()
|
||||
: std::numeric_limits<double>::quiet_NaN();
|
||||
return;
|
||||
} else if (cur() == Token::Long) {
|
||||
dv = double(lv);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ostringstream oss;
|
||||
oss << "Incorrect token in the stream. Expected: "
|
||||
<< JsonParser::toString(tk) << ", found "
|
||||
<< JsonParser::toString(cur());
|
||||
throw Exception(oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
JsonParser::Token JsonParser::doAdvance() {
|
||||
char ch = next();
|
||||
if (ch == ']') {
|
||||
if (curState == stArray0 || curState == stArrayN) {
|
||||
curState = stateStack.top();
|
||||
stateStack.pop();
|
||||
return Token::ArrayEnd;
|
||||
} else {
|
||||
throw unexpected(ch);
|
||||
}
|
||||
} else if (ch == '}') {
|
||||
if (curState == stObject0 || curState == stObjectN) {
|
||||
curState = stateStack.top();
|
||||
stateStack.pop();
|
||||
return Token::ObjectEnd;
|
||||
} else {
|
||||
throw unexpected(ch);
|
||||
}
|
||||
} else if (ch == ',') {
|
||||
if (curState != stObjectN && curState != stArrayN) {
|
||||
throw unexpected(ch);
|
||||
}
|
||||
if (curState == stObjectN) {
|
||||
curState = stObject0;
|
||||
}
|
||||
ch = next();
|
||||
} else if (ch == ':') {
|
||||
if (curState != stKey) {
|
||||
throw unexpected(ch);
|
||||
}
|
||||
curState = stObjectN;
|
||||
ch = next();
|
||||
}
|
||||
|
||||
if (curState == stObject0) {
|
||||
if (ch != '"') {
|
||||
throw unexpected(ch);
|
||||
}
|
||||
curState = stKey;
|
||||
} else if (curState == stArray0) {
|
||||
curState = stArrayN;
|
||||
}
|
||||
|
||||
switch (ch) {
|
||||
case '[':
|
||||
stateStack.push(curState);
|
||||
curState = stArray0;
|
||||
return Token::ArrayStart;
|
||||
case '{':
|
||||
stateStack.push(curState);
|
||||
curState = stObject0;
|
||||
return Token::ObjectStart;
|
||||
case '"':
|
||||
return tryString();
|
||||
case 't':
|
||||
bv = true;
|
||||
return tryLiteral("rue", 3, Token::Bool);
|
||||
case 'f':
|
||||
bv = false;
|
||||
return tryLiteral("alse", 4, Token::Bool);
|
||||
case 'n':
|
||||
return tryLiteral("ull", 3, Token::Null);
|
||||
default:
|
||||
if (isdigit(ch) || ch == '-') {
|
||||
return tryNumber(ch);
|
||||
} else {
|
||||
throw unexpected(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JsonParser::Token JsonParser::tryNumber(char ch) {
|
||||
sv.clear();
|
||||
sv.push_back(ch);
|
||||
|
||||
hasNext = false;
|
||||
int state = (ch == '-') ? 0 : (ch == '0') ? 1
|
||||
: 2;
|
||||
for (;;) {
|
||||
switch (state) {
|
||||
case 0:
|
||||
if (in_.hasMore()) {
|
||||
ch = in_.read();
|
||||
if (isdigit(ch)) {
|
||||
state = (ch == '0') ? 1 : 2;
|
||||
sv.push_back(ch);
|
||||
continue;
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (in_.hasMore()) {
|
||||
ch = in_.read();
|
||||
if (ch == '.') {
|
||||
state = 3;
|
||||
sv.push_back(ch);
|
||||
continue;
|
||||
} else if (ch == 'e' || ch == 'E') {
|
||||
sv.push_back(ch);
|
||||
state = 5;
|
||||
continue;
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (in_.hasMore()) {
|
||||
ch = in_.read();
|
||||
if (isdigit(ch)) {
|
||||
sv.push_back(ch);
|
||||
continue;
|
||||
} else if (ch == '.') {
|
||||
state = 3;
|
||||
sv.push_back(ch);
|
||||
continue;
|
||||
} else if (ch == 'e' || ch == 'E') {
|
||||
sv.push_back(ch);
|
||||
state = 5;
|
||||
continue;
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
case 6:
|
||||
if (in_.hasMore()) {
|
||||
ch = in_.read();
|
||||
if (isdigit(ch)) {
|
||||
sv.push_back(ch);
|
||||
state++;
|
||||
continue;
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (in_.hasMore()) {
|
||||
ch = in_.read();
|
||||
if (isdigit(ch)) {
|
||||
sv.push_back(ch);
|
||||
continue;
|
||||
} else if (ch == 'e' || ch == 'E') {
|
||||
sv.push_back(ch);
|
||||
state = 5;
|
||||
continue;
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (in_.hasMore()) {
|
||||
ch = in_.read();
|
||||
if (ch == '+' || ch == '-') {
|
||||
sv.push_back(ch);
|
||||
state = 6;
|
||||
continue;
|
||||
} else if (isdigit(ch)) {
|
||||
sv.push_back(ch);
|
||||
state = 7;
|
||||
continue;
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
break;
|
||||
case 7:
|
||||
if (in_.hasMore()) {
|
||||
ch = in_.read();
|
||||
if (isdigit(ch)) {
|
||||
sv.push_back(ch);
|
||||
continue;
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw Exception("Unexpected JSON parse state");
|
||||
}
|
||||
if (state == 1 || state == 2 || state == 4 || state == 7) {
|
||||
if (hasNext) {
|
||||
nextChar = ch;
|
||||
}
|
||||
std::istringstream iss(sv);
|
||||
if (state == 1 || state == 2) {
|
||||
iss >> lv;
|
||||
return Token::Long;
|
||||
} else {
|
||||
iss >> dv;
|
||||
return Token::Double;
|
||||
}
|
||||
} else {
|
||||
if (hasNext) {
|
||||
throw unexpected(ch);
|
||||
} else {
|
||||
throw Exception("Unexpected EOF");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JsonParser::Token JsonParser::tryString() {
|
||||
sv.clear();
|
||||
for (;;) {
|
||||
char ch = in_.read();
|
||||
if (ch == '"') {
|
||||
return Token::String;
|
||||
} else if (ch == '\\') {
|
||||
ch = in_.read();
|
||||
switch (ch) {
|
||||
case '"':
|
||||
case '\\':
|
||||
case '/':
|
||||
case 'b':
|
||||
case 'f':
|
||||
case 'n':
|
||||
case 'r':
|
||||
case 't':
|
||||
sv.push_back('\\');
|
||||
sv.push_back(ch);
|
||||
break;
|
||||
case 'u':
|
||||
case 'U': {
|
||||
uint32_t n = 0;
|
||||
char e[4];
|
||||
in_.readBytes(reinterpret_cast<uint8_t *>(e), 4);
|
||||
sv.push_back('\\');
|
||||
sv.push_back(ch);
|
||||
for (char c : e) {
|
||||
n *= 16;
|
||||
if (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
|
||||
sv.push_back(c);
|
||||
} else {
|
||||
throw unexpected(c);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
throw unexpected(ch);
|
||||
}
|
||||
} else {
|
||||
sv.push_back(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Decode the given string and return contents as UTF8-encoded bytes.
|
||||
// The input does not have the enclosing double-quotes.
|
||||
string JsonParser::decodeString(const string &s, bool binary) {
|
||||
string result;
|
||||
auto it = s.cbegin();
|
||||
const auto end = s.cend();
|
||||
const auto readNextByte = [&]() -> char {
|
||||
if (it == end) {
|
||||
throw Exception("Unexpected EOF");
|
||||
}
|
||||
return *it++;
|
||||
};
|
||||
const auto unicodeParse = [&]() {
|
||||
uint32_t n = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
auto c = readNextByte();
|
||||
n *= 16;
|
||||
if (isdigit(c)) {
|
||||
n += c - '0';
|
||||
} else if (c >= 'a' && c <= 'f') {
|
||||
n += c - 'a' + 10;
|
||||
} else if (c >= 'A' && c <= 'F') {
|
||||
n += c - 'A' + 10;
|
||||
} else {
|
||||
throw Exception("Invalid hex character: {}", c);
|
||||
}
|
||||
}
|
||||
return n;
|
||||
};
|
||||
while (it != end) {
|
||||
string::const_iterator startSeq = it;
|
||||
char ch = readNextByte();
|
||||
if (ch == '\\') {
|
||||
ch = readNextByte();
|
||||
switch (ch) {
|
||||
case '"':
|
||||
case '\\':
|
||||
case '/':
|
||||
result.push_back(ch);
|
||||
continue;
|
||||
case 'b':
|
||||
result.push_back('\b');
|
||||
continue;
|
||||
case 'f':
|
||||
result.push_back('\f');
|
||||
continue;
|
||||
case 'n':
|
||||
result.push_back('\n');
|
||||
continue;
|
||||
case 'r':
|
||||
result.push_back('\r');
|
||||
continue;
|
||||
case 't':
|
||||
result.push_back('\t');
|
||||
continue;
|
||||
case 'u':
|
||||
case 'U': {
|
||||
uint32_t n = unicodeParse();
|
||||
if (binary) {
|
||||
if (n > 0xff) {
|
||||
throw Exception("Invalid byte for binary: {}{}", ch, string(startSeq, ++it));
|
||||
} else {
|
||||
result.push_back(static_cast<char>(n));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (n >= 0xd800 && n < 0xdc00) {
|
||||
ch = readNextByte();
|
||||
if (ch != '\\') {
|
||||
throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
|
||||
}
|
||||
ch = readNextByte();
|
||||
if (ch != 'u' && ch != 'U') {
|
||||
throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
|
||||
}
|
||||
uint32_t m = unicodeParse();
|
||||
if (m < 0xdc00 || m > 0xdfff) {
|
||||
throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
|
||||
}
|
||||
n = 0x10000 + (((n - 0xd800) << 10) | (m - 0xdc00));
|
||||
} else if (n >= 0xdc00 && n < 0xdfff) {
|
||||
throw Exception("Invalid unicode sequence: {}", string(startSeq, it));
|
||||
}
|
||||
if (n < 0x80) {
|
||||
result.push_back(static_cast<char>(n));
|
||||
} else if (n < 0x800) {
|
||||
result.push_back(static_cast<char>((n >> 6) | 0xc0));
|
||||
result.push_back(static_cast<char>((n & 0x3f) | 0x80));
|
||||
} else if (n < 0x10000) {
|
||||
result.push_back(static_cast<char>((n >> 12) | 0xe0));
|
||||
result.push_back(static_cast<char>(((n >> 6) & 0x3f) | 0x80));
|
||||
result.push_back(static_cast<char>((n & 0x3f) | 0x80));
|
||||
} else if (n < 0x110000) {
|
||||
result.push_back(static_cast<char>((n >> 18) | 0xf0));
|
||||
result.push_back(static_cast<char>(((n >> 12) & 0x3f) | 0x80));
|
||||
result.push_back(static_cast<char>(((n >> 6) & 0x3f) | 0x80));
|
||||
result.push_back(static_cast<char>((n & 0x3f) | 0x80));
|
||||
} else {
|
||||
throw Exception("Invalid unicode value: {}{}", n, string(startSeq, ++it));
|
||||
}
|
||||
}
|
||||
continue;
|
||||
default:
|
||||
throw Exception("Unexpected JSON parse state");
|
||||
}
|
||||
} else {
|
||||
result.push_back(ch);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Exception JsonParser::unexpected(unsigned char c) {
|
||||
std::ostringstream oss;
|
||||
oss << "Unexpected character in json " << toHex(c / 16) << toHex(c % 16);
|
||||
return Exception(oss.str());
|
||||
}
|
||||
|
||||
JsonParser::Token JsonParser::tryLiteral(const char exp[], size_t n, Token tk) {
|
||||
char c[100];
|
||||
in_.readBytes(reinterpret_cast<uint8_t *>(c), n);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (c[i] != exp[i]) {
|
||||
throw unexpected(c[i]);
|
||||
}
|
||||
}
|
||||
if (in_.hasMore()) {
|
||||
nextChar = in_.read();
|
||||
if (isdigit(nextChar) || isalpha(nextChar)) {
|
||||
throw unexpected(nextChar);
|
||||
}
|
||||
hasNext = true;
|
||||
}
|
||||
return tk;
|
||||
}
|
||||
|
||||
} // namespace json
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,491 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_json_JsonIO_hh__
|
||||
#define avro_json_JsonIO_hh__
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <boost/utility.hpp>
|
||||
#include <locale>
|
||||
#include <sstream>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Stream.hh"
|
||||
|
||||
namespace avro {
|
||||
namespace json {
|
||||
|
||||
inline char toHex(unsigned int n) {
|
||||
return static_cast<char>((n < 10) ? (n + '0') : (n + 'a' - 10));
|
||||
}
|
||||
|
||||
class AVRO_DECL JsonParser : boost::noncopyable {
|
||||
public:
|
||||
enum class Token {
|
||||
Null,
|
||||
Bool,
|
||||
Long,
|
||||
Double,
|
||||
String,
|
||||
ArrayStart,
|
||||
ArrayEnd,
|
||||
ObjectStart,
|
||||
ObjectEnd
|
||||
};
|
||||
|
||||
size_t line() const { return line_; }
|
||||
|
||||
private:
|
||||
enum State {
|
||||
stValue, // Expect a data type
|
||||
stArray0, // Expect a data type or ']'
|
||||
stArrayN, // Expect a ',' or ']'
|
||||
stObject0, // Expect a string or a '}'
|
||||
stObjectN, // Expect a ',' or '}'
|
||||
stKey // Expect a ':'
|
||||
};
|
||||
std::stack<State> stateStack;
|
||||
State curState;
|
||||
bool hasNext;
|
||||
char nextChar;
|
||||
bool peeked;
|
||||
|
||||
StreamReader in_;
|
||||
Token curToken;
|
||||
bool bv;
|
||||
int64_t lv;
|
||||
double dv;
|
||||
std::string sv;
|
||||
size_t line_;
|
||||
|
||||
Token doAdvance();
|
||||
Token tryLiteral(const char exp[], size_t n, Token tk);
|
||||
Token tryNumber(char ch);
|
||||
Token tryString();
|
||||
static Exception unexpected(unsigned char ch);
|
||||
char next();
|
||||
|
||||
static std::string decodeString(const std::string &s, bool binary);
|
||||
|
||||
public:
|
||||
JsonParser() : curState(stValue), hasNext(false), nextChar(0), peeked(false),
|
||||
curToken(Token::Null), bv(false), lv(0), dv(0), line_(1) {}
|
||||
|
||||
void init(InputStream &is) {
|
||||
// Clear by swapping with an empty stack
|
||||
std::stack<State>().swap(stateStack);
|
||||
curState = stValue;
|
||||
hasNext = false;
|
||||
peeked = false;
|
||||
line_ = 1;
|
||||
in_.reset(is);
|
||||
}
|
||||
|
||||
Token advance() {
|
||||
if (!peeked) {
|
||||
curToken = doAdvance();
|
||||
} else {
|
||||
peeked = false;
|
||||
}
|
||||
return curToken;
|
||||
}
|
||||
|
||||
Token peek() {
|
||||
if (!peeked) {
|
||||
curToken = doAdvance();
|
||||
peeked = true;
|
||||
}
|
||||
return curToken;
|
||||
}
|
||||
|
||||
void expectToken(Token tk);
|
||||
|
||||
bool boolValue() const {
|
||||
return bv;
|
||||
}
|
||||
|
||||
Token cur() const {
|
||||
return curToken;
|
||||
}
|
||||
|
||||
double doubleValue() const {
|
||||
return dv;
|
||||
}
|
||||
|
||||
int64_t longValue() const {
|
||||
return lv;
|
||||
}
|
||||
|
||||
const std::string &rawString() const {
|
||||
return sv;
|
||||
}
|
||||
|
||||
std::string stringValue() const {
|
||||
return decodeString(sv, false);
|
||||
}
|
||||
|
||||
std::string bytesValue() const {
|
||||
return decodeString(sv, true);
|
||||
}
|
||||
|
||||
void drain() {
|
||||
if (!stateStack.empty() || peeked) {
|
||||
throw Exception("Invalid state for draining");
|
||||
}
|
||||
in_.drain(hasNext);
|
||||
hasNext = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return UTF-8 encoded string value.
|
||||
*/
|
||||
static std::string toStringValue(const std::string &sv) {
|
||||
return decodeString(sv, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return byte-encoded string value. It is an error if the input
|
||||
* JSON string contained unicode characters more than "\u00ff'.
|
||||
*/
|
||||
static std::string toBytesValue(const std::string &sv) {
|
||||
return decodeString(sv, true);
|
||||
}
|
||||
|
||||
static const char *const tokenNames[];
|
||||
|
||||
static const char *toString(Token tk) {
|
||||
return tokenNames[static_cast<size_t>(tk)];
|
||||
}
|
||||
};
|
||||
|
||||
class AVRO_DECL JsonNullFormatter {
|
||||
public:
|
||||
explicit JsonNullFormatter(StreamWriter &) {}
|
||||
|
||||
void handleObjectStart() {}
|
||||
void handleObjectEnd() {}
|
||||
void handleValueEnd() {}
|
||||
void handleColon() {}
|
||||
};
|
||||
|
||||
class AVRO_DECL JsonPrettyFormatter {
|
||||
StreamWriter &out_;
|
||||
size_t level_;
|
||||
std::vector<uint8_t> indent_;
|
||||
|
||||
static const int CHARS_PER_LEVEL = 2;
|
||||
|
||||
void printIndent() {
|
||||
size_t charsToIndent = level_ * CHARS_PER_LEVEL;
|
||||
if (indent_.size() < charsToIndent) {
|
||||
indent_.resize(charsToIndent * 2, ' ');
|
||||
}
|
||||
out_.writeBytes(indent_.data(), charsToIndent);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit JsonPrettyFormatter(StreamWriter &out) : out_(out), level_(0), indent_(10, ' ') {}
|
||||
|
||||
void handleObjectStart() {
|
||||
out_.write('\n');
|
||||
++level_;
|
||||
printIndent();
|
||||
}
|
||||
|
||||
void handleObjectEnd() {
|
||||
out_.write('\n');
|
||||
--level_;
|
||||
printIndent();
|
||||
}
|
||||
|
||||
void handleValueEnd() {
|
||||
out_.write('\n');
|
||||
printIndent();
|
||||
}
|
||||
|
||||
void handleColon() {
|
||||
out_.write(' ');
|
||||
}
|
||||
};
|
||||
|
||||
template<class F>
|
||||
class AVRO_DECL JsonGenerator {
|
||||
StreamWriter out_;
|
||||
F formatter_;
|
||||
enum State {
|
||||
stStart,
|
||||
stArray0,
|
||||
stArrayN,
|
||||
stMap0,
|
||||
stMapN,
|
||||
stKey,
|
||||
};
|
||||
|
||||
std::stack<State> stateStack;
|
||||
State top;
|
||||
|
||||
void write(const char *b, const char *p) {
|
||||
if (b != p) {
|
||||
out_.writeBytes(reinterpret_cast<const uint8_t *>(b), p - b);
|
||||
}
|
||||
}
|
||||
|
||||
void escape(char c, const char *b, const char *p) {
|
||||
write(b, p);
|
||||
out_.write('\\');
|
||||
out_.write(c);
|
||||
}
|
||||
|
||||
void escapeCtl(char c) {
|
||||
escapeUnicode(static_cast<uint8_t>(c));
|
||||
}
|
||||
|
||||
void writeHex(char c) {
|
||||
out_.write(toHex((static_cast<unsigned char>(c)) / 16));
|
||||
out_.write(toHex((static_cast<unsigned char>(c)) % 16));
|
||||
}
|
||||
|
||||
void escapeUnicode16(uint32_t c) {
|
||||
out_.write('\\');
|
||||
out_.write('u');
|
||||
writeHex(static_cast<char>((c >> 8) & 0xff));
|
||||
writeHex(static_cast<char>(c & 0xff));
|
||||
}
|
||||
void escapeUnicode(uint32_t c) {
|
||||
if (c < 0x10000) {
|
||||
escapeUnicode16(c);
|
||||
} else if (c < 0x110000) {
|
||||
c -= 0x10000;
|
||||
escapeUnicode16(((c >> 10) & 0x3ff) | 0xd800);
|
||||
escapeUnicode16((c & 0x3ff) | 0xdc00);
|
||||
} else {
|
||||
throw Exception("Invalid code-point: {}", c);
|
||||
}
|
||||
}
|
||||
void doEncodeString(const char *b, size_t len, bool binary) {
|
||||
const char *e = b + len;
|
||||
out_.write('"');
|
||||
for (const char *p = b; p != e; p++) {
|
||||
if ((*p & 0x80) != 0) {
|
||||
write(b, p);
|
||||
if (binary) {
|
||||
escapeCtl(*p);
|
||||
} else if ((*p & 0x40) == 0) {
|
||||
throw Exception("Invalid UTF-8 sequence");
|
||||
} else {
|
||||
int more = 1;
|
||||
uint32_t value;
|
||||
if ((*p & 0x20) != 0) {
|
||||
more++;
|
||||
if ((*p & 0x10) != 0) {
|
||||
more++;
|
||||
if ((*p & 0x08) != 0) {
|
||||
throw Exception("Invalid UTF-8 sequence");
|
||||
} else {
|
||||
value = *p & 0x07;
|
||||
}
|
||||
} else {
|
||||
value = *p & 0x0f;
|
||||
}
|
||||
} else {
|
||||
value = *p & 0x1f;
|
||||
}
|
||||
for (int i = 0; i < more; ++i) {
|
||||
if (++p == e || (*p & 0xc0) != 0x80) {
|
||||
throw Exception("Invalid UTF-8 sequence");
|
||||
}
|
||||
value <<= 6;
|
||||
value |= *p & 0x3f;
|
||||
}
|
||||
escapeUnicode(value);
|
||||
}
|
||||
} else {
|
||||
switch (*p) {
|
||||
case '\\':
|
||||
case '"':
|
||||
escape(*p, b, p);
|
||||
break;
|
||||
case '\b':
|
||||
escape('b', b, p);
|
||||
break;
|
||||
case '\f':
|
||||
escape('f', b, p);
|
||||
break;
|
||||
case '\n':
|
||||
escape('n', b, p);
|
||||
break;
|
||||
case '\r':
|
||||
escape('r', b, p);
|
||||
break;
|
||||
case '\t':
|
||||
escape('t', b, p);
|
||||
break;
|
||||
default:
|
||||
if (std::iscntrl(*p, std::locale::classic())) {
|
||||
write(b, p);
|
||||
escapeCtl(*p);
|
||||
break;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
b = p + 1;
|
||||
}
|
||||
write(b, e);
|
||||
out_.write('"');
|
||||
}
|
||||
|
||||
void sep() {
|
||||
if (top == stArrayN) {
|
||||
out_.write(',');
|
||||
formatter_.handleValueEnd();
|
||||
} else if (top == stArray0) {
|
||||
top = stArrayN;
|
||||
}
|
||||
}
|
||||
|
||||
void sep2() {
|
||||
if (top == stKey) {
|
||||
top = stMapN;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
JsonGenerator() : formatter_(out_), top(stStart) {}
|
||||
|
||||
void init(OutputStream &os) {
|
||||
out_.reset(os);
|
||||
}
|
||||
|
||||
void flush() {
|
||||
out_.flush();
|
||||
}
|
||||
|
||||
int64_t byteCount() const {
|
||||
return out_.byteCount();
|
||||
}
|
||||
|
||||
void encodeNull() {
|
||||
sep();
|
||||
out_.writeBytes(reinterpret_cast<const uint8_t *>("null"), 4);
|
||||
sep2();
|
||||
}
|
||||
|
||||
void encodeBool(bool b) {
|
||||
sep();
|
||||
if (b) {
|
||||
out_.writeBytes(reinterpret_cast<const uint8_t *>("true"), 4);
|
||||
} else {
|
||||
out_.writeBytes(reinterpret_cast<const uint8_t *>("false"), 5);
|
||||
}
|
||||
sep2();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void encodeNumber(T t) {
|
||||
sep();
|
||||
std::ostringstream oss;
|
||||
oss << boost::lexical_cast<std::string>(t);
|
||||
const std::string s = oss.str();
|
||||
out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
|
||||
sep2();
|
||||
}
|
||||
|
||||
void encodeNumber(double t) {
|
||||
sep();
|
||||
std::ostringstream oss;
|
||||
if (boost::math::isfinite(t)) {
|
||||
oss << boost::lexical_cast<std::string>(t);
|
||||
} else if (boost::math::isnan(t)) {
|
||||
oss << "NaN";
|
||||
} else if (t == std::numeric_limits<double>::infinity()) {
|
||||
oss << "Infinity";
|
||||
} else {
|
||||
oss << "-Infinity";
|
||||
}
|
||||
const std::string s = oss.str();
|
||||
out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
|
||||
sep2();
|
||||
}
|
||||
|
||||
void encodeString(const std::string &s) {
|
||||
if (top == stMap0) {
|
||||
top = stKey;
|
||||
} else if (top == stMapN) {
|
||||
out_.write(',');
|
||||
formatter_.handleValueEnd();
|
||||
top = stKey;
|
||||
} else if (top == stKey) {
|
||||
top = stMapN;
|
||||
} else {
|
||||
sep();
|
||||
}
|
||||
doEncodeString(s.c_str(), s.size(), false);
|
||||
if (top == stKey) {
|
||||
out_.write(':');
|
||||
formatter_.handleColon();
|
||||
}
|
||||
}
|
||||
|
||||
void encodeBinary(const uint8_t *bytes, size_t len) {
|
||||
sep();
|
||||
doEncodeString(reinterpret_cast<const char *>(bytes), len, true);
|
||||
sep2();
|
||||
}
|
||||
|
||||
void arrayStart() {
|
||||
sep();
|
||||
stateStack.push(top);
|
||||
top = stArray0;
|
||||
out_.write('[');
|
||||
formatter_.handleObjectStart();
|
||||
}
|
||||
|
||||
void arrayEnd() {
|
||||
top = stateStack.top();
|
||||
stateStack.pop();
|
||||
formatter_.handleObjectEnd();
|
||||
out_.write(']');
|
||||
sep2();
|
||||
}
|
||||
|
||||
void objectStart() {
|
||||
sep();
|
||||
stateStack.push(top);
|
||||
top = stMap0;
|
||||
out_.write('{');
|
||||
formatter_.handleObjectStart();
|
||||
}
|
||||
|
||||
void objectEnd() {
|
||||
top = stateStack.top();
|
||||
stateStack.pop();
|
||||
formatter_.handleObjectEnd();
|
||||
out_.write('}');
|
||||
sep2();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace json
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,660 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "Decoder.hh"
|
||||
#include "Encoder.hh"
|
||||
#include "Symbol.hh"
|
||||
#include "ValidSchema.hh"
|
||||
#include "ValidatingCodec.hh"
|
||||
|
||||
#include "../json/JsonIO.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
namespace parsing {
|
||||
|
||||
using std::make_shared;
|
||||
|
||||
using std::istringstream;
|
||||
using std::map;
|
||||
using std::ostringstream;
|
||||
using std::reverse;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
using avro::json::JsonGenerator;
|
||||
using avro::json::JsonNullFormatter;
|
||||
using avro::json::JsonParser;
|
||||
|
||||
class JsonGrammarGenerator : public ValidatingGrammarGenerator {
|
||||
ProductionPtr doGenerate(const NodePtr &n,
|
||||
std::map<NodePtr, ProductionPtr> &m) final;
|
||||
};
|
||||
|
||||
static std::string nameOf(const NodePtr &n) {
|
||||
if (n->hasName()) {
|
||||
return std::string(n->name());
|
||||
}
|
||||
std::ostringstream oss;
|
||||
oss << n->type();
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
ProductionPtr JsonGrammarGenerator::doGenerate(const NodePtr &n,
|
||||
std::map<NodePtr, ProductionPtr> &m) {
|
||||
switch (n->type()) {
|
||||
case AVRO_NULL:
|
||||
case AVRO_BOOL:
|
||||
case AVRO_INT:
|
||||
case AVRO_LONG:
|
||||
case AVRO_FLOAT:
|
||||
case AVRO_DOUBLE:
|
||||
case AVRO_STRING:
|
||||
case AVRO_BYTES:
|
||||
case AVRO_FIXED:
|
||||
case AVRO_ARRAY:
|
||||
case AVRO_MAP:
|
||||
case AVRO_SYMBOLIC:
|
||||
return ValidatingGrammarGenerator::doGenerate(n, m);
|
||||
case AVRO_RECORD: {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
|
||||
m.erase(n);
|
||||
|
||||
size_t c = n->leaves();
|
||||
result->reserve(2 + 2 * c);
|
||||
result->push_back(Symbol::recordStartSymbol());
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &leaf = n->leafAt(i);
|
||||
ProductionPtr v = doGenerate(leaf, m);
|
||||
result->push_back(Symbol::fieldSymbol(n->nameAt(i)));
|
||||
copy(v->rbegin(), v->rend(), back_inserter(*result));
|
||||
}
|
||||
result->push_back(Symbol::recordEndSymbol());
|
||||
reverse(result->begin(), result->end());
|
||||
|
||||
m[n] = result;
|
||||
return make_shared<Production>(1, Symbol::indirect(result));
|
||||
}
|
||||
case AVRO_ENUM: {
|
||||
vector<string> nn;
|
||||
size_t c = n->names();
|
||||
nn.reserve(c);
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
nn.push_back(n->nameAt(i));
|
||||
}
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::nameListSymbol(nn));
|
||||
result->push_back(Symbol::enumSymbol());
|
||||
m[n] = result;
|
||||
return result;
|
||||
}
|
||||
case AVRO_UNION: {
|
||||
size_t c = n->leaves();
|
||||
|
||||
vector<ProductionPtr> vv;
|
||||
vv.reserve(c);
|
||||
|
||||
vector<string> names;
|
||||
names.reserve(c);
|
||||
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &nn = n->leafAt(i);
|
||||
ProductionPtr v = doGenerate(nn, m);
|
||||
if (nn->type() != AVRO_NULL) {
|
||||
ProductionPtr v2 = make_shared<Production>();
|
||||
v2->push_back(Symbol::recordEndSymbol());
|
||||
copy(v->begin(), v->end(), back_inserter(*v2));
|
||||
v.swap(v2);
|
||||
}
|
||||
vv.push_back(v);
|
||||
names.push_back(nameOf(nn));
|
||||
}
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::alternative(vv));
|
||||
result->push_back(Symbol::nameListSymbol(names));
|
||||
result->push_back(Symbol::unionSymbol());
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
throw Exception("Unknown node type");
|
||||
}
|
||||
}
|
||||
|
||||
static void expectToken(JsonParser &in, JsonParser::Token tk) {
|
||||
in.expectToken(tk);
|
||||
}
|
||||
|
||||
class JsonDecoderHandler {
|
||||
JsonParser &in_;
|
||||
|
||||
public:
|
||||
explicit JsonDecoderHandler(JsonParser &p) : in_(p) {}
|
||||
size_t handle(const Symbol &s) {
|
||||
switch (s.kind()) {
|
||||
case Symbol::Kind::RecordStart:
|
||||
expectToken(in_, JsonParser::Token::ObjectStart);
|
||||
break;
|
||||
case Symbol::Kind::RecordEnd:
|
||||
expectToken(in_, JsonParser::Token::ObjectEnd);
|
||||
break;
|
||||
case Symbol::Kind::Field:
|
||||
expectToken(in_, JsonParser::Token::String);
|
||||
if (s.extra<string>() != in_.stringValue()) {
|
||||
throw Exception(R"(Incorrect field: expected "{}" but got "{}".)", s.extra<string>(), in_.stringValue());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename P>
|
||||
class JsonDecoder : public Decoder {
|
||||
JsonParser in_;
|
||||
JsonDecoderHandler handler_;
|
||||
P parser_;
|
||||
|
||||
void init(InputStream &is) final;
|
||||
void decodeNull() final;
|
||||
bool decodeBool() final;
|
||||
int32_t decodeInt() final;
|
||||
int64_t decodeLong() final;
|
||||
float decodeFloat() final;
|
||||
double decodeDouble() final;
|
||||
void decodeString(string &value) final;
|
||||
void skipString() final;
|
||||
void decodeBytes(vector<uint8_t> &value) final;
|
||||
void skipBytes() final;
|
||||
void decodeFixed(size_t n, vector<uint8_t> &value) final;
|
||||
void skipFixed(size_t n) final;
|
||||
size_t decodeEnum() final;
|
||||
size_t arrayStart() final;
|
||||
size_t arrayNext() final;
|
||||
size_t skipArray() final;
|
||||
size_t mapStart() final;
|
||||
size_t mapNext() final;
|
||||
size_t skipMap() final;
|
||||
size_t decodeUnionIndex() final;
|
||||
|
||||
void expect(JsonParser::Token tk);
|
||||
void skipComposite();
|
||||
void drain() final;
|
||||
|
||||
public:
|
||||
explicit JsonDecoder(const ValidSchema &s) : handler_(in_),
|
||||
parser_(JsonGrammarGenerator().generate(s), NULL, handler_) {}
|
||||
};
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::init(InputStream &is) {
|
||||
in_.init(is);
|
||||
parser_.reset();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::expect(JsonParser::Token tk) {
|
||||
expectToken(in_, tk);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::decodeNull() {
|
||||
parser_.advance(Symbol::Kind::Null);
|
||||
expect(JsonParser::Token::Null);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
bool JsonDecoder<P>::decodeBool() {
|
||||
parser_.advance(Symbol::Kind::Bool);
|
||||
expect(JsonParser::Token::Bool);
|
||||
bool result = in_.boolValue();
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
int32_t JsonDecoder<P>::decodeInt() {
|
||||
parser_.advance(Symbol::Kind::Int);
|
||||
expect(JsonParser::Token::Long);
|
||||
int64_t result = in_.longValue();
|
||||
if (result < INT32_MIN || result > INT32_MAX) {
|
||||
throw Exception("Value out of range for Avro int: {}", result);
|
||||
}
|
||||
return static_cast<int32_t>(result);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
int64_t JsonDecoder<P>::decodeLong() {
|
||||
parser_.advance(Symbol::Kind::Long);
|
||||
expect(JsonParser::Token::Long);
|
||||
int64_t result = in_.longValue();
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
float JsonDecoder<P>::decodeFloat() {
|
||||
parser_.advance(Symbol::Kind::Float);
|
||||
expect(JsonParser::Token::Double);
|
||||
double result = in_.doubleValue();
|
||||
return static_cast<float>(result);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
double JsonDecoder<P>::decodeDouble() {
|
||||
parser_.advance(Symbol::Kind::Double);
|
||||
expect(JsonParser::Token::Double);
|
||||
double result = in_.doubleValue();
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::decodeString(string &value) {
|
||||
parser_.advance(Symbol::Kind::String);
|
||||
expect(JsonParser::Token::String);
|
||||
value = in_.stringValue();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::skipString() {
|
||||
parser_.advance(Symbol::Kind::String);
|
||||
expect(JsonParser::Token::String);
|
||||
}
|
||||
|
||||
static vector<uint8_t> toBytes(const string &s) {
|
||||
return vector<uint8_t>(s.begin(), s.end());
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::decodeBytes(vector<uint8_t> &value) {
|
||||
parser_.advance(Symbol::Kind::Bytes);
|
||||
expect(JsonParser::Token::String);
|
||||
value = toBytes(in_.bytesValue());
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::skipBytes() {
|
||||
parser_.advance(Symbol::Kind::Bytes);
|
||||
expect(JsonParser::Token::String);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::decodeFixed(size_t n, vector<uint8_t> &value) {
|
||||
parser_.advance(Symbol::Kind::Fixed);
|
||||
parser_.assertSize(n);
|
||||
expect(JsonParser::Token::String);
|
||||
value = toBytes(in_.bytesValue());
|
||||
if (value.size() != n) {
|
||||
throw Exception("Incorrect value for fixed");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::skipFixed(size_t n) {
|
||||
parser_.advance(Symbol::Kind::Fixed);
|
||||
parser_.assertSize(n);
|
||||
expect(JsonParser::Token::String);
|
||||
vector<uint8_t> result = toBytes(in_.bytesValue());
|
||||
if (result.size() != n) {
|
||||
throw Exception("Incorrect value for fixed");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::decodeEnum() {
|
||||
parser_.advance(Symbol::Kind::Enum);
|
||||
expect(JsonParser::Token::String);
|
||||
size_t result = parser_.indexForName(in_.stringValue());
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::arrayStart() {
|
||||
parser_.advance(Symbol::Kind::ArrayStart);
|
||||
parser_.pushRepeatCount(0);
|
||||
expect(JsonParser::Token::ArrayStart);
|
||||
return arrayNext();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::arrayNext() {
|
||||
parser_.processImplicitActions();
|
||||
if (in_.peek() == JsonParser::Token::ArrayEnd) {
|
||||
in_.advance();
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::ArrayEnd);
|
||||
return 0;
|
||||
}
|
||||
parser_.nextRepeatCount(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::skipComposite() {
|
||||
size_t level = 0;
|
||||
for (;;) {
|
||||
switch (in_.advance()) {
|
||||
case JsonParser::Token::ArrayStart:
|
||||
case JsonParser::Token::ObjectStart:
|
||||
++level;
|
||||
continue;
|
||||
case JsonParser::Token::ArrayEnd:
|
||||
case JsonParser::Token::ObjectEnd:
|
||||
if (level == 0) {
|
||||
return;
|
||||
}
|
||||
--level;
|
||||
continue;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void JsonDecoder<P>::drain() {
|
||||
parser_.processImplicitActions();
|
||||
in_.drain();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::skipArray() {
|
||||
parser_.advance(Symbol::Kind::ArrayStart);
|
||||
parser_.pop();
|
||||
parser_.advance(Symbol::Kind::ArrayEnd);
|
||||
expect(JsonParser::Token::ArrayStart);
|
||||
skipComposite();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::mapStart() {
|
||||
parser_.advance(Symbol::Kind::MapStart);
|
||||
parser_.pushRepeatCount(0);
|
||||
expect(JsonParser::Token::ObjectStart);
|
||||
return mapNext();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::mapNext() {
|
||||
parser_.processImplicitActions();
|
||||
if (in_.peek() == JsonParser::Token::ObjectEnd) {
|
||||
in_.advance();
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::MapEnd);
|
||||
return 0;
|
||||
}
|
||||
parser_.nextRepeatCount(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::skipMap() {
|
||||
parser_.advance(Symbol::Kind::MapStart);
|
||||
parser_.pop();
|
||||
parser_.advance(Symbol::Kind::MapEnd);
|
||||
expect(JsonParser::Token::ObjectStart);
|
||||
skipComposite();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t JsonDecoder<P>::decodeUnionIndex() {
|
||||
parser_.advance(Symbol::Kind::Union);
|
||||
|
||||
size_t result;
|
||||
if (in_.peek() == JsonParser::Token::Null) {
|
||||
result = parser_.indexForName("null");
|
||||
} else {
|
||||
expect(JsonParser::Token::ObjectStart);
|
||||
expect(JsonParser::Token::String);
|
||||
result = parser_.indexForName(in_.stringValue());
|
||||
}
|
||||
parser_.selectBranch(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename F = JsonNullFormatter>
|
||||
class JsonHandler {
|
||||
JsonGenerator<F> &generator_;
|
||||
|
||||
public:
|
||||
explicit JsonHandler(JsonGenerator<F> &g) : generator_(g) {}
|
||||
size_t handle(const Symbol &s) {
|
||||
switch (s.kind()) {
|
||||
case Symbol::Kind::RecordStart:
|
||||
generator_.objectStart();
|
||||
break;
|
||||
case Symbol::Kind::RecordEnd:
|
||||
generator_.objectEnd();
|
||||
break;
|
||||
case Symbol::Kind::Field:
|
||||
generator_.encodeString(s.extra<string>());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename P, typename F = JsonNullFormatter>
|
||||
class JsonEncoder : public Encoder {
|
||||
JsonGenerator<F> out_;
|
||||
JsonHandler<F> handler_;
|
||||
P parser_;
|
||||
|
||||
void init(OutputStream &os) final;
|
||||
void flush() final;
|
||||
int64_t byteCount() const final;
|
||||
void encodeNull() final;
|
||||
void encodeBool(bool b) final;
|
||||
void encodeInt(int32_t i) final;
|
||||
void encodeLong(int64_t l) final;
|
||||
void encodeFloat(float f) final;
|
||||
void encodeDouble(double d) final;
|
||||
void encodeString(const std::string &s) final;
|
||||
void encodeBytes(const uint8_t *bytes, size_t len) final;
|
||||
void encodeFixed(const uint8_t *bytes, size_t len) final;
|
||||
void encodeEnum(size_t e) final;
|
||||
void arrayStart() final;
|
||||
void arrayEnd() final;
|
||||
void mapStart() final;
|
||||
void mapEnd() final;
|
||||
void setItemCount(size_t count) final;
|
||||
void startItem() final;
|
||||
void encodeUnionIndex(size_t e) final;
|
||||
|
||||
public:
|
||||
explicit JsonEncoder(const ValidSchema &schema) : handler_(out_),
|
||||
parser_(JsonGrammarGenerator().generate(schema), NULL, handler_) {}
|
||||
};
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::init(OutputStream &os) {
|
||||
out_.init(os);
|
||||
parser_.reset();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::flush() {
|
||||
parser_.processImplicitActions();
|
||||
out_.flush();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
int64_t JsonEncoder<P, F>::byteCount() const {
|
||||
return out_.byteCount();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeNull() {
|
||||
parser_.advance(Symbol::Kind::Null);
|
||||
out_.encodeNull();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeBool(bool b) {
|
||||
parser_.advance(Symbol::Kind::Bool);
|
||||
out_.encodeBool(b);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeInt(int32_t i) {
|
||||
parser_.advance(Symbol::Kind::Int);
|
||||
out_.encodeNumber(i);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeLong(int64_t l) {
|
||||
parser_.advance(Symbol::Kind::Long);
|
||||
out_.encodeNumber(l);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeFloat(float f) {
|
||||
parser_.advance(Symbol::Kind::Float);
|
||||
if (f == std::numeric_limits<float>::infinity()) {
|
||||
out_.encodeString("Infinity");
|
||||
} else if (-f == std::numeric_limits<float>::infinity()) {
|
||||
out_.encodeString("-Infinity");
|
||||
} else if (boost::math::isnan(f)) {
|
||||
out_.encodeString("NaN");
|
||||
} else {
|
||||
out_.encodeNumber(f);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeDouble(double d) {
|
||||
parser_.advance(Symbol::Kind::Double);
|
||||
if (d == std::numeric_limits<double>::infinity()) {
|
||||
out_.encodeString("Infinity");
|
||||
} else if (-d == std::numeric_limits<double>::infinity()) {
|
||||
out_.encodeString("-Infinity");
|
||||
} else if (boost::math::isnan(d)) {
|
||||
out_.encodeString("NaN");
|
||||
} else {
|
||||
out_.encodeNumber(d);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeString(const std::string &s) {
|
||||
parser_.advance(Symbol::Kind::String);
|
||||
out_.encodeString(s);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeBytes(const uint8_t *bytes, size_t len) {
|
||||
parser_.advance(Symbol::Kind::Bytes);
|
||||
out_.encodeBinary(bytes, len);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeFixed(const uint8_t *bytes, size_t len) {
|
||||
parser_.advance(Symbol::Kind::Fixed);
|
||||
parser_.assertSize(len);
|
||||
out_.encodeBinary(bytes, len);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeEnum(size_t e) {
|
||||
parser_.advance(Symbol::Kind::Enum);
|
||||
const string &s = parser_.nameForIndex(e);
|
||||
out_.encodeString(s);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::arrayStart() {
|
||||
parser_.advance(Symbol::Kind::ArrayStart);
|
||||
parser_.pushRepeatCount(0);
|
||||
out_.arrayStart();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::arrayEnd() {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::ArrayEnd);
|
||||
out_.arrayEnd();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::mapStart() {
|
||||
parser_.advance(Symbol::Kind::MapStart);
|
||||
parser_.pushRepeatCount(0);
|
||||
out_.objectStart();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::mapEnd() {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::MapEnd);
|
||||
out_.objectEnd();
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::setItemCount(size_t count) {
|
||||
parser_.nextRepeatCount(count);
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::startItem() {
|
||||
parser_.processImplicitActions();
|
||||
if (parser_.top() != Symbol::Kind::Repeater) {
|
||||
throw Exception("startItem at not an item boundary");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename P, typename F>
|
||||
void JsonEncoder<P, F>::encodeUnionIndex(size_t e) {
|
||||
parser_.advance(Symbol::Kind::Union);
|
||||
|
||||
const std::string name = parser_.nameForIndex(e);
|
||||
|
||||
if (name != "null") {
|
||||
out_.objectStart();
|
||||
out_.encodeString(name);
|
||||
}
|
||||
parser_.selectBranch(e);
|
||||
}
|
||||
|
||||
} // namespace parsing
|
||||
|
||||
DecoderPtr jsonDecoder(const ValidSchema &s) {
|
||||
return std::make_shared<parsing::JsonDecoder<
|
||||
parsing::SimpleParser<parsing::JsonDecoderHandler>>>(s);
|
||||
}
|
||||
|
||||
EncoderPtr jsonEncoder(const ValidSchema &schema) {
|
||||
return std::make_shared<parsing::JsonEncoder<
|
||||
parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonNullFormatter>>, avro::json::JsonNullFormatter>>(schema);
|
||||
}
|
||||
|
||||
EncoderPtr jsonPrettyEncoder(const ValidSchema &schema) {
|
||||
return std::make_shared<parsing::JsonEncoder<
|
||||
parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonPrettyFormatter>>, avro::json::JsonPrettyFormatter>>(schema);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,672 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "Decoder.hh"
|
||||
#include "Encoder.hh"
|
||||
#include "Generic.hh"
|
||||
#include "NodeImpl.hh"
|
||||
#include "Stream.hh"
|
||||
#include "Symbol.hh"
|
||||
#include "Types.hh"
|
||||
#include "ValidSchema.hh"
|
||||
#include "ValidatingCodec.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
using std::make_shared;
|
||||
|
||||
namespace parsing {
|
||||
|
||||
using std::shared_ptr;
|
||||
using std::static_pointer_cast;
|
||||
|
||||
using std::make_pair;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::reverse;
|
||||
using std::set;
|
||||
using std::stack;
|
||||
using std::string;
|
||||
using std::unique_ptr;
|
||||
using std::vector;
|
||||
|
||||
typedef pair<NodePtr, NodePtr> NodePair;
|
||||
|
||||
class ResolvingGrammarGenerator : public ValidatingGrammarGenerator {
|
||||
ProductionPtr doGenerate2(const NodePtr &writer,
|
||||
const NodePtr &reader, map<NodePair, ProductionPtr> &m,
|
||||
map<NodePtr, ProductionPtr> &m2);
|
||||
ProductionPtr resolveRecords(const NodePtr &writer,
|
||||
const NodePtr &reader, map<NodePair, ProductionPtr> &m,
|
||||
map<NodePtr, ProductionPtr> &m2);
|
||||
ProductionPtr resolveUnion(const NodePtr &writer,
|
||||
const NodePtr &reader, map<NodePair, ProductionPtr> &m,
|
||||
map<NodePtr, ProductionPtr> &m2);
|
||||
|
||||
static std::optional<size_t> bestBranch(const NodePtr &writer, const NodePtr &reader);
|
||||
|
||||
ProductionPtr getWriterProduction(const NodePtr &n,
|
||||
map<NodePtr, ProductionPtr> &m2);
|
||||
|
||||
public:
|
||||
Symbol generate(
|
||||
const ValidSchema &writer, const ValidSchema &reader);
|
||||
};
|
||||
|
||||
Symbol ResolvingGrammarGenerator::generate(
|
||||
const ValidSchema &writer, const ValidSchema &reader) {
|
||||
map<NodePtr, ProductionPtr> m2;
|
||||
|
||||
const NodePtr &rr = reader.root();
|
||||
const NodePtr &rw = writer.root();
|
||||
ProductionPtr backup = ValidatingGrammarGenerator::doGenerate(rw, m2);
|
||||
fixup(backup, m2);
|
||||
|
||||
map<NodePair, ProductionPtr> m;
|
||||
ProductionPtr main = doGenerate2(rw, rr, m, m2);
|
||||
fixup(main, m);
|
||||
return Symbol::rootSymbol(main, backup);
|
||||
}
|
||||
|
||||
std::optional<size_t> ResolvingGrammarGenerator::bestBranch(const NodePtr &writer,
|
||||
const NodePtr &reader) {
|
||||
Type t = writer->type();
|
||||
|
||||
const size_t c = reader->leaves();
|
||||
for (size_t j = 0; j < c; ++j) {
|
||||
NodePtr r = reader->leafAt(j);
|
||||
if (r->type() == AVRO_SYMBOLIC) {
|
||||
r = resolveSymbol(r);
|
||||
}
|
||||
if (t == r->type()) {
|
||||
if (r->hasName()) {
|
||||
if (r->name() == writer->name()) {
|
||||
return j;
|
||||
}
|
||||
} else {
|
||||
return j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < c; ++j) {
|
||||
const NodePtr &r = reader->leafAt(j);
|
||||
Type rt = r->type();
|
||||
switch (t) {
|
||||
case AVRO_INT:
|
||||
if (rt == AVRO_LONG || rt == AVRO_DOUBLE || rt == AVRO_FLOAT) {
|
||||
return j;
|
||||
}
|
||||
break;
|
||||
case AVRO_LONG:
|
||||
case AVRO_FLOAT:
|
||||
if (rt == AVRO_DOUBLE) {
|
||||
return j;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static shared_ptr<vector<uint8_t>> getAvroBinary(
|
||||
const GenericDatum &defaultValue) {
|
||||
EncoderPtr e = binaryEncoder();
|
||||
unique_ptr<OutputStream> os = memoryOutputStream();
|
||||
e->init(*os);
|
||||
GenericWriter::write(*e, defaultValue);
|
||||
e->flush();
|
||||
return snapshot(*os);
|
||||
}
|
||||
|
||||
ProductionPtr ResolvingGrammarGenerator::getWriterProduction(
|
||||
const NodePtr &n, map<NodePtr, ProductionPtr> &m2) {
|
||||
const NodePtr &nn = (n->type() == AVRO_SYMBOLIC) ? static_cast<const NodeSymbolic &>(*n).getNode() : n;
|
||||
map<NodePtr, ProductionPtr>::const_iterator it2 = m2.find(nn);
|
||||
if (it2 != m2.end()) {
|
||||
return it2->second;
|
||||
} else {
|
||||
ProductionPtr result = ValidatingGrammarGenerator::doGenerate(nn, m2);
|
||||
fixup(result, m2);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
ProductionPtr ResolvingGrammarGenerator::resolveRecords(
|
||||
const NodePtr &writer, const NodePtr &reader,
|
||||
map<NodePair, ProductionPtr> &m,
|
||||
map<NodePtr, ProductionPtr> &m2) {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
|
||||
vector<string> wf(writer->names());
|
||||
for (size_t i = 0; i < wf.size(); ++i) {
|
||||
wf[i] = writer->nameAt(i);
|
||||
}
|
||||
|
||||
set<size_t> rf;
|
||||
for (size_t i = 0; i < reader->names(); ++i) {
|
||||
rf.emplace(i);
|
||||
}
|
||||
|
||||
vector<size_t> fieldOrder;
|
||||
fieldOrder.reserve(rf.size());
|
||||
|
||||
/*
|
||||
* We look for all writer fields in the reader. If found, recursively
|
||||
* resolve the corresponding fields. Then erase the reader field.
|
||||
* If no matching field is found for reader, arrange to skip the writer
|
||||
* field.
|
||||
*/
|
||||
for (size_t wi = 0; wi != wf.size(); ++wi) {
|
||||
size_t ri;
|
||||
if (reader->nameIndex(wf[wi], ri)) {
|
||||
ProductionPtr p = doGenerate2(writer->leafAt(wi), reader->leafAt(ri), m, m2);
|
||||
copy(p->rbegin(), p->rend(), back_inserter(*result));
|
||||
fieldOrder.push_back(ri);
|
||||
rf.erase(ri);
|
||||
} else {
|
||||
ProductionPtr p = getWriterProduction(writer->leafAt(wi), m2);
|
||||
result->push_back(Symbol::skipStart());
|
||||
if (p->size() == 1) {
|
||||
result->push_back((*p)[0]);
|
||||
} else {
|
||||
result->push_back(Symbol::indirect(p));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Examine the reader fields left out (i.e. those didn't have corresponding
|
||||
* writer field).
|
||||
*/
|
||||
for (const auto ri : rf) {
|
||||
NodePtr s = reader->leafAt(ri);
|
||||
fieldOrder.push_back(ri);
|
||||
|
||||
if (s->type() == AVRO_SYMBOLIC) {
|
||||
s = resolveSymbol(s);
|
||||
}
|
||||
shared_ptr<vector<uint8_t>> defaultBinary =
|
||||
getAvroBinary(reader->defaultValueAt(ri));
|
||||
result->push_back(Symbol::defaultStartAction(defaultBinary));
|
||||
auto it = m.find(NodePair(s, s));
|
||||
ProductionPtr p = it == m.end() ? doGenerate2(s, s, m, m2) : it->second;
|
||||
copy(p->rbegin(), p->rend(), back_inserter(*result));
|
||||
result->push_back(Symbol::defaultEndAction());
|
||||
}
|
||||
reverse(result->begin(), result->end());
|
||||
result->push_back(Symbol::sizeListAction(fieldOrder));
|
||||
result->push_back(Symbol::recordAction());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ProductionPtr ResolvingGrammarGenerator::resolveUnion(
|
||||
const NodePtr &writer, const NodePtr &reader,
|
||||
map<NodePair, ProductionPtr> &m,
|
||||
map<NodePtr, ProductionPtr> &m2) {
|
||||
vector<ProductionPtr> v;
|
||||
size_t c = writer->leaves();
|
||||
v.reserve(c);
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
ProductionPtr p = doGenerate2(writer->leafAt(i), reader, m, m2);
|
||||
v.push_back(p);
|
||||
}
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::alternative(v));
|
||||
result->push_back(Symbol::writerUnionAction());
|
||||
return result;
|
||||
}
|
||||
|
||||
ProductionPtr ResolvingGrammarGenerator::doGenerate2(
|
||||
const NodePtr &w, const NodePtr &r,
|
||||
map<NodePair, ProductionPtr> &m,
|
||||
map<NodePtr, ProductionPtr> &m2) {
|
||||
const NodePtr writer = w->type() == AVRO_SYMBOLIC ? resolveSymbol(w) : w;
|
||||
const NodePtr reader = r->type() == AVRO_SYMBOLIC ? resolveSymbol(r) : r;
|
||||
Type writerType = writer->type();
|
||||
Type readerType = reader->type();
|
||||
|
||||
if (writerType == readerType) {
|
||||
switch (writerType) {
|
||||
case AVRO_NULL:
|
||||
return make_shared<Production>(1, Symbol::nullSymbol());
|
||||
case AVRO_BOOL:
|
||||
return make_shared<Production>(1, Symbol::boolSymbol());
|
||||
case AVRO_INT:
|
||||
return make_shared<Production>(1, Symbol::intSymbol());
|
||||
case AVRO_LONG:
|
||||
return make_shared<Production>(1, Symbol::longSymbol());
|
||||
case AVRO_FLOAT:
|
||||
return make_shared<Production>(1, Symbol::floatSymbol());
|
||||
case AVRO_DOUBLE:
|
||||
return make_shared<Production>(1, Symbol::doubleSymbol());
|
||||
case AVRO_STRING:
|
||||
return make_shared<Production>(1, Symbol::stringSymbol());
|
||||
case AVRO_BYTES:
|
||||
return make_shared<Production>(1, Symbol::bytesSymbol());
|
||||
case AVRO_FIXED:
|
||||
if (writer->name().equalOrAliasedBy(reader->name()) && writer->fixedSize() == reader->fixedSize()) {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::sizeCheckSymbol(reader->fixedSize()));
|
||||
result->push_back(Symbol::fixedSymbol());
|
||||
m[make_pair(writer, reader)] = result;
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
case AVRO_RECORD:
|
||||
if (writer->name().equalOrAliasedBy(reader->name())) {
|
||||
const pair<NodePtr, NodePtr> key(writer, reader);
|
||||
map<NodePair, ProductionPtr>::const_iterator kp = m.find(key);
|
||||
if (kp != m.end()) {
|
||||
return (kp->second) ? kp->second : make_shared<Production>(1, Symbol::placeholder(key));
|
||||
}
|
||||
m[key] = ProductionPtr();
|
||||
ProductionPtr result = resolveRecords(writer, reader, m, m2);
|
||||
m[key] = result;
|
||||
return make_shared<Production>(1, Symbol::indirect(result));
|
||||
}
|
||||
break;
|
||||
|
||||
case AVRO_ENUM:
|
||||
if (writer->name().equalOrAliasedBy(reader->name())) {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::enumAdjustSymbol(writer, reader));
|
||||
result->push_back(Symbol::enumSymbol());
|
||||
m[make_pair(writer, reader)] = result;
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
case AVRO_ARRAY: {
|
||||
ProductionPtr p = getWriterProduction(writer->leafAt(0), m2);
|
||||
ProductionPtr p2 = doGenerate2(writer->leafAt(0), reader->leafAt(0), m, m2);
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::arrayEndSymbol());
|
||||
result->push_back(Symbol::repeater(p2, p, true));
|
||||
result->push_back(Symbol::arrayStartSymbol());
|
||||
return result;
|
||||
}
|
||||
case AVRO_MAP: {
|
||||
ProductionPtr pp =
|
||||
doGenerate2(writer->leafAt(1), reader->leafAt(1), m, m2);
|
||||
ProductionPtr v(new Production(*pp));
|
||||
v->push_back(Symbol::stringSymbol());
|
||||
|
||||
ProductionPtr pp2 = getWriterProduction(writer->leafAt(1), m2);
|
||||
ProductionPtr v2(new Production(*pp2));
|
||||
|
||||
v2->push_back(Symbol::stringSymbol());
|
||||
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::mapEndSymbol());
|
||||
result->push_back(Symbol::repeater(v, v2, false));
|
||||
result->push_back(Symbol::mapStartSymbol());
|
||||
return result;
|
||||
}
|
||||
case AVRO_UNION:
|
||||
return resolveUnion(writer, reader, m, m2);
|
||||
case AVRO_SYMBOLIC: {
|
||||
shared_ptr<NodeSymbolic> w2 =
|
||||
static_pointer_cast<NodeSymbolic>(writer);
|
||||
shared_ptr<NodeSymbolic> r2 =
|
||||
static_pointer_cast<NodeSymbolic>(reader);
|
||||
NodePair p(w2->getNode(), r2->getNode());
|
||||
auto it = m.find(p);
|
||||
if (it != m.end() && it->second) {
|
||||
return it->second;
|
||||
} else {
|
||||
m[p] = ProductionPtr();
|
||||
return make_shared<Production>(1, Symbol::placeholder(p));
|
||||
}
|
||||
}
|
||||
default:
|
||||
throw Exception("Unknown node type");
|
||||
}
|
||||
} else if (writerType == AVRO_UNION) {
|
||||
return resolveUnion(writer, reader, m, m2);
|
||||
} else {
|
||||
switch (readerType) {
|
||||
case AVRO_LONG:
|
||||
if (writerType == AVRO_INT) {
|
||||
return make_shared<Production>(1,
|
||||
Symbol::resolveSymbol(Symbol::Kind::Int, Symbol::Kind::Long));
|
||||
}
|
||||
break;
|
||||
case AVRO_FLOAT:
|
||||
if (writerType == AVRO_INT || writerType == AVRO_LONG) {
|
||||
return make_shared<Production>(1,
|
||||
Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : Symbol::Kind::Long, Symbol::Kind::Float));
|
||||
}
|
||||
break;
|
||||
case AVRO_DOUBLE:
|
||||
if (writerType == AVRO_INT || writerType == AVRO_LONG
|
||||
|| writerType == AVRO_FLOAT) {
|
||||
return make_shared<Production>(1,
|
||||
Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : writerType == AVRO_LONG ? Symbol::Kind::Long
|
||||
: Symbol::Kind::Float,
|
||||
Symbol::Kind::Double));
|
||||
}
|
||||
break;
|
||||
|
||||
case AVRO_UNION: {
|
||||
auto j = bestBranch(writer, reader);
|
||||
if (j) {
|
||||
ProductionPtr p = doGenerate2(writer, reader->leafAt(*j), m, m2);
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::unionAdjustSymbol(*j, p));
|
||||
result->push_back(Symbol::unionSymbol());
|
||||
return result;
|
||||
}
|
||||
} break;
|
||||
case AVRO_NULL:
|
||||
case AVRO_BOOL:
|
||||
case AVRO_INT:
|
||||
case AVRO_STRING:
|
||||
case AVRO_BYTES:
|
||||
case AVRO_ENUM:
|
||||
case AVRO_ARRAY:
|
||||
case AVRO_MAP:
|
||||
case AVRO_RECORD:
|
||||
break;
|
||||
default:
|
||||
throw Exception("Unknown node type");
|
||||
}
|
||||
}
|
||||
return make_shared<Production>(1, Symbol::error(writer, reader));
|
||||
}
|
||||
|
||||
class ResolvingDecoderHandler {
|
||||
shared_ptr<vector<uint8_t>> defaultData_;
|
||||
unique_ptr<InputStream> inp_;
|
||||
DecoderPtr backup_;
|
||||
DecoderPtr &base_;
|
||||
const DecoderPtr binDecoder;
|
||||
|
||||
public:
|
||||
explicit ResolvingDecoderHandler(DecoderPtr &base) : base_(base),
|
||||
binDecoder(binaryDecoder()) {}
|
||||
size_t handle(const Symbol &s) {
|
||||
switch (s.kind()) {
|
||||
case Symbol::Kind::WriterUnion:
|
||||
return base_->decodeUnionIndex();
|
||||
case Symbol::Kind::DefaultStart:
|
||||
defaultData_ = s.extra<shared_ptr<vector<uint8_t>>>();
|
||||
backup_ = base_;
|
||||
inp_ = memoryInputStream(&(*defaultData_)[0], defaultData_->size());
|
||||
base_ = binDecoder;
|
||||
base_->init(*inp_);
|
||||
return 0;
|
||||
case Symbol::Kind::DefaultEnd:
|
||||
base_ = backup_;
|
||||
backup_.reset();
|
||||
return 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
if (backup_ != nullptr) {
|
||||
base_ = backup_;
|
||||
backup_.reset();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Parser>
|
||||
class ResolvingDecoderImpl : public ResolvingDecoder {
|
||||
DecoderPtr base_;
|
||||
ResolvingDecoderHandler handler_;
|
||||
Parser parser_;
|
||||
|
||||
void init(InputStream &is) final;
|
||||
void decodeNull() final;
|
||||
bool decodeBool() final;
|
||||
int32_t decodeInt() final;
|
||||
int64_t decodeLong() final;
|
||||
float decodeFloat() final;
|
||||
double decodeDouble() final;
|
||||
void decodeString(string &value) final;
|
||||
void skipString() final;
|
||||
void decodeBytes(vector<uint8_t> &value) final;
|
||||
void skipBytes() final;
|
||||
void decodeFixed(size_t n, vector<uint8_t> &value) final;
|
||||
void skipFixed(size_t n) final;
|
||||
size_t decodeEnum() final;
|
||||
size_t arrayStart() final;
|
||||
size_t arrayNext() final;
|
||||
size_t skipArray() final;
|
||||
size_t mapStart() final;
|
||||
size_t mapNext() final;
|
||||
size_t skipMap() final;
|
||||
size_t decodeUnionIndex() final;
|
||||
const vector<size_t> &fieldOrder() final;
|
||||
void drain() final {
|
||||
parser_.processImplicitActions();
|
||||
base_->drain();
|
||||
}
|
||||
|
||||
public:
|
||||
ResolvingDecoderImpl(const ValidSchema &writer, const ValidSchema &reader,
|
||||
DecoderPtr base) : base_(std::move(base)),
|
||||
handler_(base_),
|
||||
parser_(ResolvingGrammarGenerator().generate(writer, reader),
|
||||
&(*base_), handler_) {
|
||||
}
|
||||
};
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::init(InputStream &is) {
|
||||
handler_.reset();
|
||||
base_->init(is);
|
||||
parser_.reset();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::decodeNull() {
|
||||
parser_.advance(Symbol::Kind::Null);
|
||||
base_->decodeNull();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
bool ResolvingDecoderImpl<P>::decodeBool() {
|
||||
parser_.advance(Symbol::Kind::Bool);
|
||||
return base_->decodeBool();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
int32_t ResolvingDecoderImpl<P>::decodeInt() {
|
||||
parser_.advance(Symbol::Kind::Int);
|
||||
return base_->decodeInt();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
int64_t ResolvingDecoderImpl<P>::decodeLong() {
|
||||
Symbol::Kind k = parser_.advance(Symbol::Kind::Long);
|
||||
return k == Symbol::Kind::Int ? base_->decodeInt() : base_->decodeLong();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
float ResolvingDecoderImpl<P>::decodeFloat() {
|
||||
Symbol::Kind k = parser_.advance(Symbol::Kind::Float);
|
||||
return k == Symbol::Kind::Int ? static_cast<float>(base_->decodeInt())
|
||||
: k == Symbol::Kind::Long ? static_cast<float>(base_->decodeLong())
|
||||
: base_->decodeFloat();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
double ResolvingDecoderImpl<P>::decodeDouble() {
|
||||
Symbol::Kind k = parser_.advance(Symbol::Kind::Double);
|
||||
return k == Symbol::Kind::Int ? static_cast<double>(base_->decodeInt())
|
||||
: k == Symbol::Kind::Long ? static_cast<double>(base_->decodeLong())
|
||||
: k == Symbol::Kind::Float ? base_->decodeFloat()
|
||||
: base_->decodeDouble();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::decodeString(string &value) {
|
||||
parser_.advance(Symbol::Kind::String);
|
||||
base_->decodeString(value);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::skipString() {
|
||||
parser_.advance(Symbol::Kind::String);
|
||||
base_->skipString();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::decodeBytes(vector<uint8_t> &value) {
|
||||
parser_.advance(Symbol::Kind::Bytes);
|
||||
base_->decodeBytes(value);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::skipBytes() {
|
||||
parser_.advance(Symbol::Kind::Bytes);
|
||||
base_->skipBytes();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::decodeFixed(size_t n, vector<uint8_t> &value) {
|
||||
parser_.advance(Symbol::Kind::Fixed);
|
||||
parser_.assertSize(n);
|
||||
return base_->decodeFixed(n, value);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ResolvingDecoderImpl<P>::skipFixed(size_t n) {
|
||||
parser_.advance(Symbol::Kind::Fixed);
|
||||
parser_.assertSize(n);
|
||||
base_->skipFixed(n);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::decodeEnum() {
|
||||
parser_.advance(Symbol::Kind::Enum);
|
||||
size_t n = base_->decodeEnum();
|
||||
return parser_.enumAdjust(n);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::arrayStart() {
|
||||
parser_.advance(Symbol::Kind::ArrayStart);
|
||||
size_t result = base_->arrayStart();
|
||||
parser_.pushRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::ArrayEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::arrayNext() {
|
||||
parser_.processImplicitActions();
|
||||
size_t result = base_->arrayNext();
|
||||
parser_.nextRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::ArrayEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::skipArray() {
|
||||
parser_.advance(Symbol::Kind::ArrayStart);
|
||||
size_t n = base_->skipArray();
|
||||
if (n == 0) {
|
||||
parser_.pop();
|
||||
} else {
|
||||
parser_.pushRepeatCount(n);
|
||||
parser_.skip(*base_);
|
||||
}
|
||||
parser_.advance(Symbol::Kind::ArrayEnd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::mapStart() {
|
||||
parser_.advance(Symbol::Kind::MapStart);
|
||||
size_t result = base_->mapStart();
|
||||
parser_.pushRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::MapEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::mapNext() {
|
||||
parser_.processImplicitActions();
|
||||
size_t result = base_->mapNext();
|
||||
parser_.nextRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::MapEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::skipMap() {
|
||||
parser_.advance(Symbol::Kind::MapStart);
|
||||
size_t n = base_->skipMap();
|
||||
if (n == 0) {
|
||||
parser_.pop();
|
||||
} else {
|
||||
parser_.pushRepeatCount(n);
|
||||
parser_.skip(*base_);
|
||||
}
|
||||
parser_.advance(Symbol::Kind::MapEnd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ResolvingDecoderImpl<P>::decodeUnionIndex() {
|
||||
parser_.advance(Symbol::Kind::Union);
|
||||
return parser_.unionAdjust();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
const vector<size_t> &ResolvingDecoderImpl<P>::fieldOrder() {
|
||||
parser_.advance(Symbol::Kind::Record);
|
||||
return parser_.sizeList();
|
||||
}
|
||||
|
||||
} // namespace parsing
|
||||
|
||||
ResolvingDecoderPtr resolvingDecoder(const ValidSchema &writer,
|
||||
const ValidSchema &reader, const DecoderPtr &base) {
|
||||
return make_shared<parsing::ResolvingDecoderImpl<parsing::SimpleParser<parsing::ResolvingDecoderHandler>>>(
|
||||
writer, reader, base);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Symbol.hh"
|
||||
|
||||
namespace avro {
|
||||
namespace parsing {
|
||||
|
||||
using std::ostringstream;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
const char *Symbol::stringValues[] = {
|
||||
"TerminalLow",
|
||||
"Null",
|
||||
"Bool",
|
||||
"Int",
|
||||
"Long",
|
||||
"Float",
|
||||
"Double",
|
||||
"String",
|
||||
"Bytes",
|
||||
"ArrayStart",
|
||||
"ArrayEnd",
|
||||
"MapStart",
|
||||
"MapEnd",
|
||||
"Fixed",
|
||||
"Enum",
|
||||
"Union",
|
||||
"TerminalHigh",
|
||||
"SizeCheck",
|
||||
"NameList",
|
||||
"Root",
|
||||
"Repeater",
|
||||
"Alternative",
|
||||
"Placeholder",
|
||||
"Indirect",
|
||||
"Symbolic",
|
||||
"EnumAdjust",
|
||||
"UnionAdjust",
|
||||
"SkipStart",
|
||||
"Resolve",
|
||||
"ImplicitActionLow",
|
||||
"RecordStart",
|
||||
"RecordEnd",
|
||||
"Field",
|
||||
"Record",
|
||||
"SizeList",
|
||||
"WriterUnion",
|
||||
"DefaultStart",
|
||||
"DefaultEnd",
|
||||
"ImplicitActionHigh",
|
||||
"Error"};
|
||||
|
||||
Symbol Symbol::enumAdjustSymbol(const NodePtr &writer, const NodePtr &reader) {
|
||||
vector<string> rs;
|
||||
size_t rc = reader->names();
|
||||
for (size_t i = 0; i < rc; ++i) {
|
||||
rs.push_back(reader->nameAt(i));
|
||||
}
|
||||
|
||||
size_t wc = writer->names();
|
||||
vector<int> adj; // enums are encoded as ints
|
||||
adj.reserve(wc);
|
||||
|
||||
vector<string> err;
|
||||
|
||||
for (size_t i = 0; i < wc; ++i) {
|
||||
const string &s = writer->nameAt(i);
|
||||
vector<string>::const_iterator it = find(rs.begin(), rs.end(), s);
|
||||
if (it == rs.end()) {
|
||||
auto pos = err.size() + 1;
|
||||
adj.push_back(static_cast<int>(-pos));
|
||||
err.push_back(s);
|
||||
} else {
|
||||
adj.push_back(static_cast<int>(it - rs.begin()));
|
||||
}
|
||||
}
|
||||
return Symbol(Kind::EnumAdjust, make_pair(adj, err));
|
||||
}
|
||||
|
||||
Symbol Symbol::error(const NodePtr &writer, const NodePtr &reader) {
|
||||
ostringstream oss;
|
||||
oss << "Cannot resolve: " << std::endl;
|
||||
writer->printJson(oss, 0);
|
||||
oss << std::endl
|
||||
<< "with" << std::endl;
|
||||
reader->printJson(oss, 0);
|
||||
return Symbol(Kind::Error, oss.str());
|
||||
}
|
||||
|
||||
} // namespace parsing
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,793 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_parsing_Symbol_hh__
|
||||
#define avro_parsing_Symbol_hh__
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <stack>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "Decoder.hh"
|
||||
#include "Exception.hh"
|
||||
#include "Node.hh"
|
||||
|
||||
#include <boost/any.hpp>
|
||||
#include <boost/tuple/tuple.hpp>
|
||||
|
||||
namespace avro {
|
||||
namespace parsing {
|
||||
|
||||
class Symbol;
|
||||
|
||||
typedef std::vector<Symbol> Production;
|
||||
typedef std::shared_ptr<Production> ProductionPtr;
|
||||
typedef boost::tuple<std::stack<ssize_t>, bool, ProductionPtr, ProductionPtr> RepeaterInfo;
|
||||
typedef boost::tuple<ProductionPtr, ProductionPtr> RootInfo;
|
||||
|
||||
class Symbol {
|
||||
public:
|
||||
enum class Kind {
|
||||
TerminalLow, // extra has nothing
|
||||
Null,
|
||||
Bool,
|
||||
Int,
|
||||
Long,
|
||||
Float,
|
||||
Double,
|
||||
String,
|
||||
Bytes,
|
||||
ArrayStart,
|
||||
ArrayEnd,
|
||||
MapStart,
|
||||
MapEnd,
|
||||
Fixed,
|
||||
Enum,
|
||||
Union,
|
||||
TerminalHigh,
|
||||
SizeCheck, // Extra has size
|
||||
NameList, // Extra has a vector<string>
|
||||
Root, // Root for a schema, extra is Symbol
|
||||
Repeater, // Array or Map, extra is symbol
|
||||
Alternative, // One of many (union), extra is Union
|
||||
Placeholder, // To be fixed up later.
|
||||
Indirect, // extra is shared_ptr<Production>
|
||||
Symbolic, // extra is weal_ptr<Production>
|
||||
EnumAdjust,
|
||||
UnionAdjust,
|
||||
SkipStart,
|
||||
Resolve,
|
||||
|
||||
ImplicitActionLow,
|
||||
RecordStart,
|
||||
RecordEnd,
|
||||
Field, // extra is string
|
||||
Record,
|
||||
SizeList,
|
||||
WriterUnion,
|
||||
DefaultStart, // extra has default value in Avro binary encoding
|
||||
DefaultEnd,
|
||||
ImplicitActionHigh,
|
||||
Error
|
||||
};
|
||||
|
||||
private:
|
||||
Kind kind_;
|
||||
boost::any extra_;
|
||||
|
||||
explicit Symbol(Kind k) : kind_(k) {}
|
||||
template <typename T>
|
||||
Symbol(Kind k, T t) : kind_(k), extra_(t) {}
|
||||
|
||||
public:
|
||||
Kind kind() const {
|
||||
return kind_;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T extra() const {
|
||||
return boost::any_cast<T>(extra_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T* extrap() {
|
||||
return boost::any_cast<T>(&extra_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T* extrap() const {
|
||||
return boost::any_cast<T>(&extra_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void extra(const T& t) {
|
||||
extra_ = t;
|
||||
}
|
||||
|
||||
bool isTerminal() const {
|
||||
return kind_ > Kind::TerminalLow && kind_ < Kind::TerminalHigh;
|
||||
}
|
||||
|
||||
bool isImplicitAction() const {
|
||||
return kind_ > Kind::ImplicitActionLow && kind_ < Kind::ImplicitActionHigh;
|
||||
}
|
||||
|
||||
static const char* stringValues[];
|
||||
static const char* toString(Kind k) {
|
||||
return stringValues[static_cast<size_t>(k)];
|
||||
}
|
||||
|
||||
static Symbol rootSymbol(ProductionPtr& s) {
|
||||
return Symbol(Kind::Root, RootInfo(s, std::make_shared<Production>()));
|
||||
}
|
||||
|
||||
static Symbol rootSymbol(const ProductionPtr& main, const ProductionPtr& backup) {
|
||||
return Symbol(Kind::Root, RootInfo(main, backup));
|
||||
}
|
||||
|
||||
static Symbol nullSymbol() {
|
||||
return Symbol(Kind::Null);
|
||||
}
|
||||
|
||||
static Symbol boolSymbol() {
|
||||
return Symbol(Kind::Bool);
|
||||
}
|
||||
|
||||
static Symbol intSymbol() {
|
||||
return Symbol(Kind::Int);
|
||||
}
|
||||
|
||||
static Symbol longSymbol() {
|
||||
return Symbol(Kind::Long);
|
||||
}
|
||||
|
||||
static Symbol floatSymbol() {
|
||||
return Symbol(Kind::Float);
|
||||
}
|
||||
|
||||
static Symbol doubleSymbol() {
|
||||
return Symbol(Kind::Double);
|
||||
}
|
||||
|
||||
static Symbol stringSymbol() {
|
||||
return Symbol(Kind::String);
|
||||
}
|
||||
|
||||
static Symbol bytesSymbol() {
|
||||
return Symbol(Kind::Bytes);
|
||||
}
|
||||
|
||||
static Symbol sizeCheckSymbol(size_t s) {
|
||||
return Symbol(Kind::SizeCheck, s);
|
||||
}
|
||||
|
||||
static Symbol fixedSymbol() {
|
||||
return Symbol(Kind::Fixed);
|
||||
}
|
||||
|
||||
static Symbol enumSymbol() {
|
||||
return Symbol(Kind::Enum);
|
||||
}
|
||||
|
||||
static Symbol arrayStartSymbol() {
|
||||
return Symbol(Kind::ArrayStart);
|
||||
}
|
||||
|
||||
static Symbol arrayEndSymbol() {
|
||||
return Symbol(Kind::ArrayEnd);
|
||||
}
|
||||
|
||||
static Symbol mapStartSymbol() {
|
||||
return Symbol(Kind::MapStart);
|
||||
}
|
||||
|
||||
static Symbol mapEndSymbol() {
|
||||
return Symbol(Kind::MapEnd);
|
||||
}
|
||||
|
||||
static Symbol repeater(const ProductionPtr& p, bool isArray) {
|
||||
return repeater(p, p, isArray);
|
||||
}
|
||||
|
||||
static Symbol repeater(const ProductionPtr& read, const ProductionPtr& skip, bool isArray) {
|
||||
std::stack<ssize_t> s;
|
||||
return Symbol(Kind::Repeater, RepeaterInfo(s, isArray, read, skip));
|
||||
}
|
||||
|
||||
static Symbol defaultStartAction(std::shared_ptr<std::vector<uint8_t>> bb) {
|
||||
return Symbol(Kind::DefaultStart, std::move(bb));
|
||||
}
|
||||
|
||||
static Symbol defaultEndAction() {
|
||||
return Symbol(Kind::DefaultEnd);
|
||||
}
|
||||
|
||||
static Symbol alternative(const std::vector<ProductionPtr>& branches) {
|
||||
return Symbol(Symbol::Kind::Alternative, branches);
|
||||
}
|
||||
|
||||
static Symbol unionSymbol() {
|
||||
return Symbol(Kind::Union);
|
||||
}
|
||||
|
||||
static Symbol recordStartSymbol() {
|
||||
return Symbol(Kind::RecordStart);
|
||||
}
|
||||
|
||||
static Symbol recordEndSymbol() {
|
||||
return Symbol(Kind::RecordEnd);
|
||||
}
|
||||
|
||||
static Symbol fieldSymbol(const std::string& name) {
|
||||
return Symbol(Kind::Field, name);
|
||||
}
|
||||
|
||||
static Symbol writerUnionAction() {
|
||||
return Symbol(Kind::WriterUnion);
|
||||
}
|
||||
|
||||
static Symbol nameListSymbol(const std::vector<std::string>& v) {
|
||||
return Symbol(Kind::NameList, v);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static Symbol placeholder(const T& n) {
|
||||
return Symbol(Kind::Placeholder, n);
|
||||
}
|
||||
|
||||
static Symbol indirect(const ProductionPtr& p) {
|
||||
return Symbol(Kind::Indirect, p);
|
||||
}
|
||||
|
||||
static Symbol symbolic(const std::weak_ptr<Production>& p) {
|
||||
return Symbol(Kind::Symbolic, p);
|
||||
}
|
||||
|
||||
static Symbol enumAdjustSymbol(const NodePtr& writer, const NodePtr& reader);
|
||||
|
||||
static Symbol unionAdjustSymbol(size_t branch, const ProductionPtr& p) {
|
||||
return Symbol(Kind::UnionAdjust, std::make_pair(branch, p));
|
||||
}
|
||||
|
||||
static Symbol sizeListAction(std::vector<size_t> order) {
|
||||
return Symbol(Kind::SizeList, std::move(order));
|
||||
}
|
||||
|
||||
static Symbol recordAction() {
|
||||
return Symbol(Kind::Record);
|
||||
}
|
||||
|
||||
static Symbol error(const NodePtr& writer, const NodePtr& reader);
|
||||
|
||||
static Symbol resolveSymbol(Kind w, Kind r) {
|
||||
return Symbol(Kind::Resolve, std::make_pair(w, r));
|
||||
}
|
||||
|
||||
static Symbol skipStart() {
|
||||
return Symbol(Kind::SkipStart);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Recursively replaces all placeholders in the production with the
|
||||
* corresponding values.
|
||||
*/
|
||||
template <typename T>
|
||||
void fixup(const ProductionPtr& p, const std::map<T, ProductionPtr>& m) {
|
||||
std::set<ProductionPtr> seen;
|
||||
for (auto& it : *p) {
|
||||
fixup(it, m, seen);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively replaces all placeholders in the symbol with the values with the
|
||||
* corresponding values.
|
||||
*/
|
||||
template <typename T>
|
||||
void fixup_internal(const ProductionPtr& p,
|
||||
const std::map<T, ProductionPtr>& m,
|
||||
std::set<ProductionPtr>& seen) {
|
||||
if (seen.find(p) == seen.end()) {
|
||||
seen.insert(p);
|
||||
for (auto& it : *p) {
|
||||
fixup(it, m, seen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fixup(Symbol& s, const std::map<T, ProductionPtr>& m, std::set<ProductionPtr>& seen) {
|
||||
switch (s.kind()) {
|
||||
case Symbol::Kind::Indirect:
|
||||
fixup_internal(s.extra<ProductionPtr>(), m, seen);
|
||||
break;
|
||||
case Symbol::Kind::Alternative: {
|
||||
const std::vector<ProductionPtr>* vv = s.extrap<std::vector<ProductionPtr>>();
|
||||
for (const auto& it : *vv) {
|
||||
fixup_internal(it, m, seen);
|
||||
}
|
||||
} break;
|
||||
case Symbol::Kind::Repeater: {
|
||||
const RepeaterInfo& ri = *s.extrap<RepeaterInfo>();
|
||||
fixup_internal(boost::tuples::get<2>(ri), m, seen);
|
||||
fixup_internal(boost::tuples::get<3>(ri), m, seen);
|
||||
} break;
|
||||
case Symbol::Kind::Placeholder: {
|
||||
typename std::map<T, std::shared_ptr<Production>>::const_iterator it =
|
||||
m.find(s.extra<T>());
|
||||
if (it == m.end()) {
|
||||
throw Exception("Placeholder symbol cannot be resolved");
|
||||
}
|
||||
s = Symbol::symbolic(std::weak_ptr<Production>(it->second));
|
||||
} break;
|
||||
case Symbol::Kind::UnionAdjust:
|
||||
fixup_internal(s.extrap<std::pair<size_t, ProductionPtr>>()->second, m, seen);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Handler>
|
||||
class SimpleParser {
|
||||
Decoder* decoder_;
|
||||
Handler& handler_;
|
||||
/*
|
||||
* parsingStack always has root at the bottom of it.
|
||||
* So it is safe to call top() on it.
|
||||
*/
|
||||
std::stack<Symbol> parsingStack;
|
||||
|
||||
static void throwMismatch(Symbol::Kind actual, Symbol::Kind expected) {
|
||||
std::ostringstream oss;
|
||||
oss << "Invalid operation. Schema requires: " << Symbol::toString(expected)
|
||||
<< ", got: " << Symbol::toString(actual);
|
||||
throw Exception(oss.str());
|
||||
}
|
||||
|
||||
static void assertMatch(Symbol::Kind actual, Symbol::Kind expected) {
|
||||
if (expected != actual) {
|
||||
throwMismatch(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
void append(const ProductionPtr& ss) {
|
||||
for (Production::const_iterator it = ss->begin(); it != ss->end(); ++it) {
|
||||
parsingStack.push(*it);
|
||||
}
|
||||
}
|
||||
|
||||
size_t popSize() {
|
||||
const Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::SizeCheck, s.kind());
|
||||
auto result = s.extra<size_t>();
|
||||
parsingStack.pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
static void assertLessThan(size_t n, size_t s) {
|
||||
if (n >= s) {
|
||||
std::ostringstream oss;
|
||||
oss << "Size max value. Upper bound: " << s << " found " << n;
|
||||
throw Exception(oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
Symbol::Kind advance(Symbol::Kind k) {
|
||||
for (;;) {
|
||||
Symbol& s = parsingStack.top();
|
||||
// std::cout << "advance: " << Symbol::toString(s.kind())
|
||||
// << " looking for " << Symbol::toString(k) << '\n';
|
||||
if (s.kind() == k) {
|
||||
parsingStack.pop();
|
||||
return k;
|
||||
} else if (s.isTerminal()) {
|
||||
throwMismatch(k, s.kind());
|
||||
} else {
|
||||
switch (s.kind()) {
|
||||
case Symbol::Kind::Root:
|
||||
append(boost::tuples::get<0>(*s.extrap<RootInfo>()));
|
||||
continue;
|
||||
case Symbol::Kind::Indirect: {
|
||||
ProductionPtr pp = s.extra<ProductionPtr>();
|
||||
parsingStack.pop();
|
||||
append(pp);
|
||||
}
|
||||
continue;
|
||||
case Symbol::Kind::Symbolic: {
|
||||
ProductionPtr pp(s.extra<std::weak_ptr<Production>>());
|
||||
parsingStack.pop();
|
||||
append(pp);
|
||||
}
|
||||
continue;
|
||||
case Symbol::Kind::Repeater: {
|
||||
auto* p = s.extrap<RepeaterInfo>();
|
||||
std::stack<ssize_t>& ns = boost::tuples::get<0>(*p);
|
||||
if (ns.empty()) {
|
||||
throw Exception("Empty item count stack in repeater advance");
|
||||
}
|
||||
if (ns.top() == 0) {
|
||||
throw Exception("Zero item count in repeater advance");
|
||||
}
|
||||
--ns.top();
|
||||
append(boost::tuples::get<2>(*p));
|
||||
}
|
||||
continue;
|
||||
case Symbol::Kind::Error:
|
||||
throw Exception(s.extra<std::string>());
|
||||
case Symbol::Kind::Resolve: {
|
||||
const std::pair<Symbol::Kind, Symbol::Kind>* p =
|
||||
s.extrap<std::pair<Symbol::Kind, Symbol::Kind>>();
|
||||
assertMatch(p->second, k);
|
||||
Symbol::Kind result = p->first;
|
||||
parsingStack.pop();
|
||||
return result;
|
||||
}
|
||||
case Symbol::Kind::SkipStart:
|
||||
parsingStack.pop();
|
||||
skip(*decoder_);
|
||||
break;
|
||||
default:
|
||||
if (s.isImplicitAction()) {
|
||||
size_t n = handler_.handle(s);
|
||||
if (s.kind() == Symbol::Kind::WriterUnion) {
|
||||
parsingStack.pop();
|
||||
selectBranch(n);
|
||||
} else {
|
||||
parsingStack.pop();
|
||||
}
|
||||
} else {
|
||||
std::ostringstream oss;
|
||||
oss << "Encountered " << Symbol::toString(s.kind())
|
||||
<< " while looking for " << Symbol::toString(k);
|
||||
throw Exception(oss.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void skip(Decoder& d) {
|
||||
const size_t sz = parsingStack.size();
|
||||
if (sz == 0) {
|
||||
throw Exception("Nothing to skip!");
|
||||
}
|
||||
while (parsingStack.size() >= sz) {
|
||||
Symbol& t = parsingStack.top();
|
||||
// std::cout << "skip: " << Symbol::toString(t.kind()) << '\n';
|
||||
switch (t.kind()) {
|
||||
case Symbol::Kind::Null:
|
||||
d.decodeNull();
|
||||
break;
|
||||
case Symbol::Kind::Bool:
|
||||
d.decodeBool();
|
||||
break;
|
||||
case Symbol::Kind::Int:
|
||||
d.decodeInt();
|
||||
break;
|
||||
case Symbol::Kind::Long:
|
||||
d.decodeLong();
|
||||
break;
|
||||
case Symbol::Kind::Float:
|
||||
d.decodeFloat();
|
||||
break;
|
||||
case Symbol::Kind::Double:
|
||||
d.decodeDouble();
|
||||
break;
|
||||
case Symbol::Kind::String:
|
||||
d.skipString();
|
||||
break;
|
||||
case Symbol::Kind::Bytes:
|
||||
d.skipBytes();
|
||||
break;
|
||||
case Symbol::Kind::ArrayStart: {
|
||||
parsingStack.pop();
|
||||
size_t n = d.skipArray();
|
||||
processImplicitActions();
|
||||
assertMatch(Symbol::Kind::Repeater, parsingStack.top().kind());
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
Symbol& t2 = parsingStack.top();
|
||||
auto* p = t2.extrap<RepeaterInfo>();
|
||||
boost::tuples::get<0>(*p).push(n);
|
||||
continue;
|
||||
}
|
||||
case Symbol::Kind::ArrayEnd:
|
||||
break;
|
||||
case Symbol::Kind::MapStart: {
|
||||
parsingStack.pop();
|
||||
size_t n = d.skipMap();
|
||||
processImplicitActions();
|
||||
assertMatch(Symbol::Kind::Repeater, parsingStack.top().kind());
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
Symbol& t2 = parsingStack.top();
|
||||
auto* p2 = t2.extrap<RepeaterInfo>();
|
||||
boost::tuples::get<0>(*p2).push(n);
|
||||
continue;
|
||||
}
|
||||
case Symbol::Kind::MapEnd:
|
||||
break;
|
||||
case Symbol::Kind::Fixed: {
|
||||
parsingStack.pop();
|
||||
Symbol& t2 = parsingStack.top();
|
||||
d.decodeFixed(t2.extra<size_t>());
|
||||
} break;
|
||||
case Symbol::Kind::Enum:
|
||||
parsingStack.pop();
|
||||
d.decodeEnum();
|
||||
break;
|
||||
case Symbol::Kind::Union: {
|
||||
parsingStack.pop();
|
||||
size_t n = d.decodeUnionIndex();
|
||||
selectBranch(n);
|
||||
continue;
|
||||
}
|
||||
case Symbol::Kind::Repeater: {
|
||||
auto* p = t.extrap<RepeaterInfo>();
|
||||
std::stack<ssize_t>& ns = boost::tuples::get<0>(*p);
|
||||
if (ns.empty()) {
|
||||
throw Exception("Empty item count stack in repeater skip");
|
||||
}
|
||||
ssize_t& n = ns.top();
|
||||
if (n == 0) {
|
||||
n = boost::tuples::get<1>(*p) ? d.arrayNext() : d.mapNext();
|
||||
}
|
||||
if (n != 0) {
|
||||
--n;
|
||||
append(boost::tuples::get<3>(*p));
|
||||
continue;
|
||||
} else {
|
||||
ns.pop();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Symbol::Kind::Indirect: {
|
||||
ProductionPtr pp = t.extra<ProductionPtr>();
|
||||
parsingStack.pop();
|
||||
append(pp);
|
||||
}
|
||||
continue;
|
||||
case Symbol::Kind::Symbolic: {
|
||||
ProductionPtr pp(t.extra<std::weak_ptr<Production>>());
|
||||
parsingStack.pop();
|
||||
append(pp);
|
||||
}
|
||||
continue;
|
||||
default: {
|
||||
std::ostringstream oss;
|
||||
oss << "Don't know how to skip " << Symbol::toString(t.kind());
|
||||
throw Exception(oss.str());
|
||||
}
|
||||
}
|
||||
parsingStack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
void assertSize(size_t n) {
|
||||
size_t s = popSize();
|
||||
if (s != n) {
|
||||
std::ostringstream oss;
|
||||
oss << "Incorrect size. Expected: " << s << " found " << n;
|
||||
throw Exception(oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
void assertLessThanSize(size_t n) {
|
||||
assertLessThan(n, popSize());
|
||||
}
|
||||
|
||||
size_t enumAdjust(size_t n) {
|
||||
const Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::EnumAdjust, s.kind());
|
||||
const auto* v = s.extrap<std::pair<std::vector<int>, std::vector<std::string>>>();
|
||||
assertLessThan(n, v->first.size());
|
||||
|
||||
int result = v->first[n];
|
||||
if (result < 0) {
|
||||
std::ostringstream oss;
|
||||
oss << "Cannot resolve symbol: " << v->second[-result - 1] << std::endl;
|
||||
throw Exception(oss.str());
|
||||
}
|
||||
parsingStack.pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t unionAdjust() {
|
||||
const Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::UnionAdjust, s.kind());
|
||||
std::pair<size_t, ProductionPtr> p = s.extra<std::pair<size_t, ProductionPtr>>();
|
||||
parsingStack.pop();
|
||||
append(p.second);
|
||||
return p.first;
|
||||
}
|
||||
|
||||
std::string nameForIndex(size_t e) {
|
||||
const Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::NameList, s.kind());
|
||||
const std::vector<std::string> names = s.extra<std::vector<std::string>>();
|
||||
if (e >= names.size()) {
|
||||
throw Exception("Not that many names");
|
||||
}
|
||||
std::string result = names[e];
|
||||
parsingStack.pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t indexForName(const std::string& name) {
|
||||
const Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::NameList, s.kind());
|
||||
const std::vector<std::string> names = s.extra<std::vector<std::string>>();
|
||||
auto it = std::find(names.begin(), names.end(), name);
|
||||
if (it == names.end()) {
|
||||
throw Exception("No such enum symbol");
|
||||
}
|
||||
size_t result = it - names.begin();
|
||||
parsingStack.pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
void pushRepeatCount(size_t n) {
|
||||
processImplicitActions();
|
||||
Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::Repeater, s.kind());
|
||||
auto* p = s.extrap<RepeaterInfo>();
|
||||
std::stack<ssize_t>& nn = boost::tuples::get<0>(*p);
|
||||
nn.push(n);
|
||||
}
|
||||
|
||||
void nextRepeatCount(size_t n) {
|
||||
processImplicitActions();
|
||||
Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::Repeater, s.kind());
|
||||
auto* p = s.extrap<RepeaterInfo>();
|
||||
std::stack<ssize_t>& nn = boost::tuples::get<0>(*p);
|
||||
if (nn.empty() || nn.top() != 0) {
|
||||
throw Exception("Wrong number of items");
|
||||
}
|
||||
nn.top() = n;
|
||||
}
|
||||
|
||||
void popRepeater() {
|
||||
processImplicitActions();
|
||||
Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::Repeater, s.kind());
|
||||
auto* p = s.extrap<RepeaterInfo>();
|
||||
std::stack<ssize_t>& ns = boost::tuples::get<0>(*p);
|
||||
if (ns.empty()) {
|
||||
throw Exception("Incorrect number of items (empty)");
|
||||
}
|
||||
if (ns.top() > 0) {
|
||||
throw Exception("Incorrect number of items (non-zero)");
|
||||
}
|
||||
ns.pop();
|
||||
parsingStack.pop();
|
||||
}
|
||||
|
||||
void selectBranch(size_t n) {
|
||||
const Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::Alternative, s.kind());
|
||||
std::vector<ProductionPtr> v = s.extra<std::vector<ProductionPtr>>();
|
||||
if (n >= v.size()) {
|
||||
throw Exception("Not that many branches");
|
||||
}
|
||||
parsingStack.pop();
|
||||
append(v[n]);
|
||||
}
|
||||
|
||||
const std::vector<size_t>& sizeList() {
|
||||
const Symbol& s = parsingStack.top();
|
||||
assertMatch(Symbol::Kind::SizeList, s.kind());
|
||||
return *s.extrap<std::vector<size_t>>();
|
||||
}
|
||||
|
||||
Symbol::Kind top() const {
|
||||
return parsingStack.top().kind();
|
||||
}
|
||||
|
||||
void pop() {
|
||||
parsingStack.pop();
|
||||
}
|
||||
|
||||
void processImplicitActions() {
|
||||
for (;;) {
|
||||
Symbol& s = parsingStack.top();
|
||||
if (s.isImplicitAction()) {
|
||||
handler_.handle(s);
|
||||
parsingStack.pop();
|
||||
} else if (s.kind() == Symbol::Kind::SkipStart) {
|
||||
parsingStack.pop();
|
||||
skip(*decoder_);
|
||||
} else if (s.kind() == Symbol::Kind::Indirect) {
|
||||
ProductionPtr pp = s.extra<ProductionPtr>();
|
||||
parsingStack.pop();
|
||||
append(pp);
|
||||
} else if (s.kind() == Symbol::Kind::Symbolic) {
|
||||
ProductionPtr pp(s.extra<std::weak_ptr<Production>>());
|
||||
parsingStack.pop();
|
||||
append(pp);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SimpleParser(const Symbol& s, Decoder* d, Handler& h) : decoder_(d), handler_(h) {
|
||||
parsingStack.push(s);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
while (parsingStack.size() > 1) {
|
||||
parsingStack.pop();
|
||||
}
|
||||
Symbol& s = parsingStack.top();
|
||||
append(boost::tuples::get<0>(*s.extrap<RootInfo>()));
|
||||
}
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const Symbol& s);
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const Production& p) {
|
||||
os << '(';
|
||||
for (const auto& it : p) {
|
||||
os << it << ", ";
|
||||
}
|
||||
os << ')';
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const Symbol& s) {
|
||||
switch (s.kind()) {
|
||||
case Symbol::Kind::Repeater: {
|
||||
const RepeaterInfo& ri = *s.extrap<RepeaterInfo>();
|
||||
os << '(' << Symbol::toString(s.kind()) << ' ' << *boost::tuples::get<2>(ri) << ' '
|
||||
<< *boost::tuples::get<3>(ri) << ')';
|
||||
} break;
|
||||
case Symbol::Kind::Indirect: {
|
||||
os << '(' << Symbol::toString(s.kind()) << ' '
|
||||
<< *s.extra<std::shared_ptr<Production>>() << ')';
|
||||
} break;
|
||||
case Symbol::Kind::Alternative: {
|
||||
os << '(' << Symbol::toString(s.kind());
|
||||
for (const auto& it : *s.extrap<std::vector<ProductionPtr>>()) {
|
||||
os << ' ' << *it;
|
||||
}
|
||||
os << ')';
|
||||
} break;
|
||||
case Symbol::Kind::Symbolic: {
|
||||
os << '(' << Symbol::toString(s.kind()) << ' '
|
||||
<< s.extra<std::weak_ptr<Production>>().lock() << ')';
|
||||
} break;
|
||||
default:
|
||||
os << Symbol::toString(s.kind());
|
||||
break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
} // namespace parsing
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,535 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ValidatingCodec.hh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/any.hpp>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "Decoder.hh"
|
||||
#include "Encoder.hh"
|
||||
#include "NodeImpl.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
using std::make_shared;
|
||||
|
||||
namespace parsing {
|
||||
|
||||
using std::shared_ptr;
|
||||
using std::static_pointer_cast;
|
||||
|
||||
using std::map;
|
||||
using std::ostringstream;
|
||||
using std::pair;
|
||||
using std::reverse;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
/** Follows the design of Avro Parser in Java. */
|
||||
ProductionPtr ValidatingGrammarGenerator::generate(const NodePtr &n) {
|
||||
map<NodePtr, ProductionPtr> m;
|
||||
ProductionPtr result = doGenerate(n, m);
|
||||
fixup(result, m);
|
||||
return result;
|
||||
}
|
||||
|
||||
Symbol ValidatingGrammarGenerator::generate(const ValidSchema &schema) {
|
||||
ProductionPtr r = generate(schema.root());
|
||||
return Symbol::rootSymbol(r);
|
||||
}
|
||||
|
||||
ProductionPtr ValidatingGrammarGenerator::doGenerate(const NodePtr &n,
|
||||
map<NodePtr, ProductionPtr> &m) {
|
||||
switch (n->type()) {
|
||||
case AVRO_NULL:
|
||||
return make_shared<Production>(1, Symbol::nullSymbol());
|
||||
case AVRO_BOOL:
|
||||
return make_shared<Production>(1, Symbol::boolSymbol());
|
||||
case AVRO_INT:
|
||||
return make_shared<Production>(1, Symbol::intSymbol());
|
||||
case AVRO_LONG:
|
||||
return make_shared<Production>(1, Symbol::longSymbol());
|
||||
case AVRO_FLOAT:
|
||||
return make_shared<Production>(1, Symbol::floatSymbol());
|
||||
case AVRO_DOUBLE:
|
||||
return make_shared<Production>(1, Symbol::doubleSymbol());
|
||||
case AVRO_STRING:
|
||||
return make_shared<Production>(1, Symbol::stringSymbol());
|
||||
case AVRO_BYTES:
|
||||
return make_shared<Production>(1, Symbol::bytesSymbol());
|
||||
case AVRO_FIXED: {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::sizeCheckSymbol(n->fixedSize()));
|
||||
result->push_back(Symbol::fixedSymbol());
|
||||
m[n] = result;
|
||||
return result;
|
||||
}
|
||||
case AVRO_RECORD: {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
|
||||
m.erase(n);
|
||||
size_t c = n->leaves();
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
const NodePtr &leaf = n->leafAt(i);
|
||||
ProductionPtr v = doGenerate(leaf, m);
|
||||
copy(v->rbegin(), v->rend(), back_inserter(*result));
|
||||
}
|
||||
reverse(result->begin(), result->end());
|
||||
|
||||
m[n] = result;
|
||||
return make_shared<Production>(1, Symbol::indirect(result));
|
||||
}
|
||||
case AVRO_ENUM: {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::sizeCheckSymbol(n->names()));
|
||||
result->push_back(Symbol::enumSymbol());
|
||||
m[n] = result;
|
||||
return result;
|
||||
}
|
||||
case AVRO_ARRAY: {
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::arrayEndSymbol());
|
||||
result->push_back(Symbol::repeater(doGenerate(n->leafAt(0), m), true));
|
||||
result->push_back(Symbol::arrayStartSymbol());
|
||||
return result;
|
||||
}
|
||||
case AVRO_MAP: {
|
||||
ProductionPtr pp = doGenerate(n->leafAt(1), m);
|
||||
ProductionPtr v(new Production(*pp));
|
||||
v->push_back(Symbol::stringSymbol());
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::mapEndSymbol());
|
||||
result->push_back(Symbol::repeater(v, false));
|
||||
result->push_back(Symbol::mapStartSymbol());
|
||||
return result;
|
||||
}
|
||||
case AVRO_UNION: {
|
||||
vector<ProductionPtr> vv;
|
||||
size_t c = n->leaves();
|
||||
vv.reserve(c);
|
||||
for (size_t i = 0; i < c; ++i) {
|
||||
vv.push_back(doGenerate(n->leafAt(i), m));
|
||||
}
|
||||
ProductionPtr result = make_shared<Production>();
|
||||
result->push_back(Symbol::alternative(vv));
|
||||
result->push_back(Symbol::unionSymbol());
|
||||
return result;
|
||||
}
|
||||
case AVRO_SYMBOLIC: {
|
||||
shared_ptr<NodeSymbolic> ns = static_pointer_cast<NodeSymbolic>(n);
|
||||
NodePtr nn = ns->getNode();
|
||||
auto it = m.find(nn);
|
||||
if (it != m.end() && it->second) {
|
||||
return it->second;
|
||||
} else {
|
||||
m[nn] = ProductionPtr();
|
||||
return make_shared<Production>(1, Symbol::placeholder(nn));
|
||||
}
|
||||
}
|
||||
default:
|
||||
throw Exception("Unknown node type");
|
||||
}
|
||||
}
|
||||
|
||||
struct DummyHandler {
|
||||
static size_t handle(const Symbol &) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename P>
|
||||
class ValidatingDecoder : public Decoder {
|
||||
const shared_ptr<Decoder> base;
|
||||
DummyHandler handler_;
|
||||
P parser;
|
||||
|
||||
void init(InputStream &is) final;
|
||||
void decodeNull() final;
|
||||
bool decodeBool() final;
|
||||
int32_t decodeInt() final;
|
||||
int64_t decodeLong() final;
|
||||
float decodeFloat() final;
|
||||
double decodeDouble() final;
|
||||
void decodeString(string &value) final;
|
||||
void skipString() final;
|
||||
void decodeBytes(vector<uint8_t> &value) final;
|
||||
void skipBytes() final;
|
||||
void decodeFixed(size_t n, vector<uint8_t> &value) final;
|
||||
void skipFixed(size_t n) final;
|
||||
size_t decodeEnum() final;
|
||||
size_t arrayStart() final;
|
||||
size_t arrayNext() final;
|
||||
size_t skipArray() final;
|
||||
size_t mapStart() final;
|
||||
size_t mapNext() final;
|
||||
size_t skipMap() final;
|
||||
size_t decodeUnionIndex() final;
|
||||
void drain() final {
|
||||
base->drain();
|
||||
}
|
||||
|
||||
public:
|
||||
ValidatingDecoder(const ValidSchema &s, const shared_ptr<Decoder> &b) : base(b),
|
||||
parser(ValidatingGrammarGenerator().generate(s), NULL, handler_) {}
|
||||
};
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::init(InputStream &is) {
|
||||
base->init(is);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::decodeNull() {
|
||||
parser.advance(Symbol::Kind::Null);
|
||||
base->decodeNull();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
bool ValidatingDecoder<P>::decodeBool() {
|
||||
parser.advance(Symbol::Kind::Bool);
|
||||
return base->decodeBool();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
int32_t ValidatingDecoder<P>::decodeInt() {
|
||||
parser.advance(Symbol::Kind::Int);
|
||||
return base->decodeInt();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
int64_t ValidatingDecoder<P>::decodeLong() {
|
||||
parser.advance(Symbol::Kind::Long);
|
||||
return base->decodeLong();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
float ValidatingDecoder<P>::decodeFloat() {
|
||||
parser.advance(Symbol::Kind::Float);
|
||||
return base->decodeFloat();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
double ValidatingDecoder<P>::decodeDouble() {
|
||||
parser.advance(Symbol::Kind::Double);
|
||||
return base->decodeDouble();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::decodeString(string &value) {
|
||||
parser.advance(Symbol::Kind::String);
|
||||
base->decodeString(value);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::skipString() {
|
||||
parser.advance(Symbol::Kind::String);
|
||||
base->skipString();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::decodeBytes(vector<uint8_t> &value) {
|
||||
parser.advance(Symbol::Kind::Bytes);
|
||||
base->decodeBytes(value);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::skipBytes() {
|
||||
parser.advance(Symbol::Kind::Bytes);
|
||||
base->skipBytes();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::decodeFixed(size_t n, vector<uint8_t> &value) {
|
||||
parser.advance(Symbol::Kind::Fixed);
|
||||
parser.assertSize(n);
|
||||
base->decodeFixed(n, value);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingDecoder<P>::skipFixed(size_t n) {
|
||||
parser.advance(Symbol::Kind::Fixed);
|
||||
parser.assertSize(n);
|
||||
base->skipFixed(n);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::decodeEnum() {
|
||||
parser.advance(Symbol::Kind::Enum);
|
||||
size_t result = base->decodeEnum();
|
||||
parser.assertLessThanSize(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::arrayStart() {
|
||||
parser.advance(Symbol::Kind::ArrayStart);
|
||||
size_t result = base->arrayStart();
|
||||
parser.pushRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser.popRepeater();
|
||||
parser.advance(Symbol::Kind::ArrayEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::arrayNext() {
|
||||
size_t result = base->arrayNext();
|
||||
parser.nextRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser.popRepeater();
|
||||
parser.advance(Symbol::Kind::ArrayEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::skipArray() {
|
||||
parser.advance(Symbol::Kind::ArrayStart);
|
||||
size_t n = base->skipArray();
|
||||
if (n == 0) {
|
||||
parser.pop();
|
||||
} else {
|
||||
parser.pushRepeatCount(n);
|
||||
parser.skip(*base);
|
||||
}
|
||||
parser.advance(Symbol::Kind::ArrayEnd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::mapStart() {
|
||||
parser.advance(Symbol::Kind::MapStart);
|
||||
size_t result = base->mapStart();
|
||||
parser.pushRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser.popRepeater();
|
||||
parser.advance(Symbol::Kind::MapEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::mapNext() {
|
||||
size_t result = base->mapNext();
|
||||
parser.nextRepeatCount(result);
|
||||
if (result == 0) {
|
||||
parser.popRepeater();
|
||||
parser.advance(Symbol::Kind::MapEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::skipMap() {
|
||||
parser.advance(Symbol::Kind::MapStart);
|
||||
size_t n = base->skipMap();
|
||||
if (n == 0) {
|
||||
parser.pop();
|
||||
} else {
|
||||
parser.pushRepeatCount(n);
|
||||
parser.skip(*base);
|
||||
}
|
||||
parser.advance(Symbol::Kind::MapEnd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
size_t ValidatingDecoder<P>::decodeUnionIndex() {
|
||||
parser.advance(Symbol::Kind::Union);
|
||||
size_t result = base->decodeUnionIndex();
|
||||
parser.selectBranch(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
class ValidatingEncoder : public Encoder {
|
||||
DummyHandler handler_;
|
||||
P parser_;
|
||||
EncoderPtr base_;
|
||||
|
||||
void init(OutputStream &os) final;
|
||||
void flush() final;
|
||||
int64_t byteCount() const final;
|
||||
void encodeNull() final;
|
||||
void encodeBool(bool b) final;
|
||||
void encodeInt(int32_t i) final;
|
||||
void encodeLong(int64_t l) final;
|
||||
void encodeFloat(float f) final;
|
||||
void encodeDouble(double d) final;
|
||||
void encodeString(const std::string &s) final;
|
||||
void encodeBytes(const uint8_t *bytes, size_t len) final;
|
||||
void encodeFixed(const uint8_t *bytes, size_t len) final;
|
||||
void encodeEnum(size_t e) final;
|
||||
void arrayStart() final;
|
||||
void arrayEnd() final;
|
||||
void mapStart() final;
|
||||
void mapEnd() final;
|
||||
void setItemCount(size_t count) final;
|
||||
void startItem() final;
|
||||
void encodeUnionIndex(size_t e) final;
|
||||
|
||||
public:
|
||||
ValidatingEncoder(const ValidSchema &schema, EncoderPtr base) : parser_(ValidatingGrammarGenerator().generate(schema), NULL, handler_),
|
||||
base_(std::move(base)) {}
|
||||
};
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::init(OutputStream &os) {
|
||||
base_->init(os);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::flush() {
|
||||
base_->flush();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeNull() {
|
||||
parser_.advance(Symbol::Kind::Null);
|
||||
base_->encodeNull();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeBool(bool b) {
|
||||
parser_.advance(Symbol::Kind::Bool);
|
||||
base_->encodeBool(b);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeInt(int32_t i) {
|
||||
parser_.advance(Symbol::Kind::Int);
|
||||
base_->encodeInt(i);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeLong(int64_t l) {
|
||||
parser_.advance(Symbol::Kind::Long);
|
||||
base_->encodeLong(l);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeFloat(float f) {
|
||||
parser_.advance(Symbol::Kind::Float);
|
||||
base_->encodeFloat(f);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeDouble(double d) {
|
||||
parser_.advance(Symbol::Kind::Double);
|
||||
base_->encodeDouble(d);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeString(const std::string &s) {
|
||||
parser_.advance(Symbol::Kind::String);
|
||||
base_->encodeString(s);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeBytes(const uint8_t *bytes, size_t len) {
|
||||
parser_.advance(Symbol::Kind::Bytes);
|
||||
base_->encodeBytes(bytes, len);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeFixed(const uint8_t *bytes, size_t len) {
|
||||
parser_.advance(Symbol::Kind::Fixed);
|
||||
parser_.assertSize(len);
|
||||
base_->encodeFixed(bytes, len);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeEnum(size_t e) {
|
||||
parser_.advance(Symbol::Kind::Enum);
|
||||
parser_.assertLessThanSize(e);
|
||||
base_->encodeEnum(e);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::arrayStart() {
|
||||
parser_.advance(Symbol::Kind::ArrayStart);
|
||||
parser_.pushRepeatCount(0);
|
||||
base_->arrayStart();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::arrayEnd() {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::ArrayEnd);
|
||||
base_->arrayEnd();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::mapStart() {
|
||||
parser_.advance(Symbol::Kind::MapStart);
|
||||
parser_.pushRepeatCount(0);
|
||||
base_->mapStart();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::mapEnd() {
|
||||
parser_.popRepeater();
|
||||
parser_.advance(Symbol::Kind::MapEnd);
|
||||
base_->mapEnd();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::setItemCount(size_t count) {
|
||||
parser_.nextRepeatCount(count);
|
||||
base_->setItemCount(count);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::startItem() {
|
||||
parser_.processImplicitActions();
|
||||
if (parser_.top() != Symbol::Kind::Repeater) {
|
||||
throw Exception("startItem at not an item boundary");
|
||||
}
|
||||
base_->startItem();
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
void ValidatingEncoder<P>::encodeUnionIndex(size_t e) {
|
||||
parser_.advance(Symbol::Kind::Union);
|
||||
parser_.selectBranch(e);
|
||||
base_->encodeUnionIndex(e);
|
||||
}
|
||||
|
||||
template<typename P>
|
||||
int64_t ValidatingEncoder<P>::byteCount() const {
|
||||
return base_->byteCount();
|
||||
}
|
||||
|
||||
} // namespace parsing
|
||||
|
||||
DecoderPtr validatingDecoder(const ValidSchema &s,
|
||||
const DecoderPtr &base) {
|
||||
return make_shared<parsing::ValidatingDecoder<parsing::SimpleParser<parsing::DummyHandler>>>(s, base);
|
||||
}
|
||||
|
||||
EncoderPtr validatingEncoder(const ValidSchema &schema, const EncoderPtr &base) {
|
||||
return make_shared<parsing::ValidatingEncoder<parsing::SimpleParser<parsing::DummyHandler>>>(schema, base);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_parsing_ValidatingCodec_hh__
|
||||
#define avro_parsing_ValidatingCodec_hh__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "NodeImpl.hh"
|
||||
#include "Symbol.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
namespace avro {
|
||||
namespace parsing {
|
||||
|
||||
class ValidatingGrammarGenerator {
|
||||
protected:
|
||||
virtual ProductionPtr doGenerate(const NodePtr &n,
|
||||
std::map<NodePtr, ProductionPtr> &m);
|
||||
|
||||
ProductionPtr generate(const NodePtr &schema);
|
||||
|
||||
public:
|
||||
Symbol generate(const ValidSchema &schema);
|
||||
};
|
||||
|
||||
} // namespace parsing
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_AvroParse_hh__
|
||||
#define avro_AvroParse_hh__
|
||||
|
||||
#include "AvroTraits.hh"
|
||||
#include "Config.hh"
|
||||
#include "ResolvingReader.hh"
|
||||
|
||||
/// \file
|
||||
///
|
||||
/// Standalone parse functions for Avro types.
|
||||
|
||||
namespace avro {
|
||||
|
||||
/// The main parse entry point function. Takes a parser (either validating or
|
||||
/// plain) and the object that should receive the parsed data.
|
||||
|
||||
template<typename Reader, typename T>
|
||||
void parse(Reader &p, T &val) {
|
||||
parse(p, val, is_serializable<T>());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void parse(ResolvingReader &p, T &val) {
|
||||
translatingParse(p, val, is_serializable<T>());
|
||||
}
|
||||
|
||||
/// Type trait should be set to is_serializable in otherwise force the compiler to complain.
|
||||
|
||||
template<typename Reader, typename T>
|
||||
void parse(Reader &p, T &val, const std::false_type &) {
|
||||
static_assert(sizeof(T) == 0, "Not a valid type to parse");
|
||||
}
|
||||
|
||||
template<typename Reader, typename T>
|
||||
void translatingParse(Reader &p, T &val, const std::false_type &) {
|
||||
static_assert(sizeof(T) == 0, "Not a valid type to parse");
|
||||
}
|
||||
|
||||
// @{
|
||||
|
||||
/// The remainder of the file includes default implementations for serializable types.
|
||||
|
||||
template<typename Reader, typename T>
|
||||
void parse(Reader &p, T &val, const std::true_type &) {
|
||||
p.readValue(val);
|
||||
}
|
||||
|
||||
template<typename Reader>
|
||||
void parse(Reader &p, std::vector<uint8_t> &val, const std::true_type &) {
|
||||
p.readBytes(val);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void translatingParse(ResolvingReader &p, T &val, const std::true_type &) {
|
||||
p.parse(val);
|
||||
}
|
||||
|
||||
// @}
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_AvroSerialize_hh__
|
||||
#define avro_AvroSerialize_hh__
|
||||
|
||||
#include "AvroTraits.hh"
|
||||
#include "Config.hh"
|
||||
|
||||
/// \file
|
||||
///
|
||||
/// Standalone serialize functions for Avro types.
|
||||
|
||||
namespace avro {
|
||||
|
||||
/// The main serializer entry point function. Takes a serializer (either validating or
|
||||
/// plain) and the object that should be serialized.
|
||||
|
||||
template<typename Writer, typename T>
|
||||
void serialize(Writer &s, const T &val) {
|
||||
serialize(s, val, is_serializable<T>());
|
||||
}
|
||||
|
||||
/// Type trait should be set to is_serializable in otherwise force the compiler to complain.
|
||||
|
||||
template<typename Writer, typename T>
|
||||
void serialize(Writer &s, const T &val, const std::false_type &) {
|
||||
static_assert(sizeof(T) == 0, "Not a valid type to serialize");
|
||||
}
|
||||
|
||||
/// The remainder of the file includes default implementations for serializable types.
|
||||
|
||||
// @{
|
||||
|
||||
template<typename Writer, typename T>
|
||||
void serialize(Writer &s, T val, const std::true_type &) {
|
||||
s.writeValue(val);
|
||||
}
|
||||
|
||||
template<typename Writer>
|
||||
void serialize(Writer &s, const std::vector<uint8_t> &val, const std::true_type &) {
|
||||
s.writeBytes(val.data(), val.size());
|
||||
}
|
||||
|
||||
// @}
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_AvroTraits_hh__
|
||||
#define avro_AvroTraits_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Types.hh"
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
/** @file
|
||||
*
|
||||
* This header contains type traits and similar utilities used by the library.
|
||||
*/
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* Define an is_serializable trait for types we can serialize natively.
|
||||
* New types will need to define the trait as well.
|
||||
*/
|
||||
template<typename T>
|
||||
struct is_serializable : public std::false_type {};
|
||||
|
||||
template<typename T>
|
||||
struct is_promotable : public std::false_type {};
|
||||
|
||||
template<typename T>
|
||||
struct type_to_avro {
|
||||
static const Type type = AVRO_NUM_TYPES;
|
||||
};
|
||||
|
||||
/**
|
||||
* Check if a \p T is a complete type i.e. it is defined as opposed to just
|
||||
* declared.
|
||||
*
|
||||
* is_defined<T>::value will be true or false depending on whether T is a
|
||||
* complete type or not respectively.
|
||||
*/
|
||||
template<class T>
|
||||
struct is_defined {
|
||||
|
||||
typedef char yes[1];
|
||||
|
||||
typedef char no[2];
|
||||
|
||||
template<class U>
|
||||
static yes &test(char (*)[sizeof(U)]) { throw 0; }
|
||||
|
||||
template<class U>
|
||||
static no &test(...) { throw 0; }
|
||||
|
||||
static const bool value = sizeof(test<T>(0)) == sizeof(yes);
|
||||
};
|
||||
|
||||
/**
|
||||
* Similar to is_defined, but used to check if T is not defined.
|
||||
*
|
||||
* is_not_defined<T>::value will be true or false depending on whether T is an
|
||||
* incomplete type or not respectively.
|
||||
*/
|
||||
template<class T>
|
||||
struct is_not_defined {
|
||||
|
||||
typedef char yes[1];
|
||||
|
||||
typedef char no[2];
|
||||
|
||||
template<class U>
|
||||
static yes &test(char (*)[sizeof(U)]) { throw 0; }
|
||||
|
||||
template<class U>
|
||||
static no &test(...) { throw 0; }
|
||||
|
||||
static const bool value = sizeof(test<T>(0)) == sizeof(no);
|
||||
};
|
||||
|
||||
#define DEFINE_PRIMITIVE(CTYPE, AVROTYPE) \
|
||||
template<> \
|
||||
struct is_serializable<CTYPE> : public std::true_type {}; \
|
||||
\
|
||||
template<> \
|
||||
struct type_to_avro<CTYPE> { \
|
||||
static const Type type = AVROTYPE; \
|
||||
};
|
||||
|
||||
#define DEFINE_PROMOTABLE_PRIMITIVE(CTYPE, AVROTYPE) \
|
||||
template<> \
|
||||
struct is_promotable<CTYPE> : public std::true_type {}; \
|
||||
\
|
||||
DEFINE_PRIMITIVE(CTYPE, AVROTYPE)
|
||||
|
||||
DEFINE_PROMOTABLE_PRIMITIVE(int32_t, AVRO_INT)
|
||||
DEFINE_PROMOTABLE_PRIMITIVE(int64_t, AVRO_LONG)
|
||||
DEFINE_PROMOTABLE_PRIMITIVE(float, AVRO_FLOAT)
|
||||
DEFINE_PRIMITIVE(double, AVRO_DOUBLE)
|
||||
DEFINE_PRIMITIVE(bool, AVRO_BOOL)
|
||||
DEFINE_PRIMITIVE(Null, AVRO_NULL)
|
||||
DEFINE_PRIMITIVE(std::string, AVRO_STRING)
|
||||
DEFINE_PRIMITIVE(std::vector<uint8_t>, AVRO_BYTES)
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Compiler_hh__
|
||||
#define avro_Compiler_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include <cstdint>
|
||||
#include <istream>
|
||||
|
||||
namespace avro {
|
||||
|
||||
class AVRO_DECL InputStream;
|
||||
|
||||
/// This class is used to implement an avro spec parser using a flex/bison
|
||||
/// compiler. In order for the lexer to be reentrant, this class provides a
|
||||
/// lexer object for each parse. The bison parser also uses this class to
|
||||
/// build up an avro parse tree as the avro spec is parsed.
|
||||
|
||||
class AVRO_DECL ValidSchema;
|
||||
|
||||
/// Given a stream containing a JSON schema, compiles the schema to a
|
||||
/// ValidSchema object. Throws if the schema cannot be compiled to a valid
|
||||
/// schema
|
||||
|
||||
AVRO_DECL void compileJsonSchema(std::istream &is, ValidSchema &schema);
|
||||
|
||||
/// Non-throwing version of compileJsonSchema.
|
||||
///
|
||||
/// \return True if no error, false if error (with the error string set)
|
||||
///
|
||||
|
||||
AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema,
|
||||
std::string &error);
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromStream(InputStream &is);
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromMemory(const uint8_t *input, size_t len);
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromString(const char *input);
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromString(const std::string &input);
|
||||
|
||||
AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename);
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Config_hh
|
||||
#define avro_Config_hh
|
||||
|
||||
// Windows DLL support
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable : 4275 4251)
|
||||
|
||||
#if defined(AVRO_DYN_LINK)
|
||||
#ifdef AVRO_SOURCE
|
||||
#define AVRO_DECL __declspec(dllexport)
|
||||
#else
|
||||
#define AVRO_DECL __declspec(dllimport)
|
||||
#endif // AVRO_SOURCE
|
||||
#endif // AVRO_DYN_LINK
|
||||
|
||||
#include <intsafe.h>
|
||||
using ssize_t = SSIZE_T;
|
||||
#endif // _WIN32
|
||||
|
||||
#ifndef AVRO_DECL
|
||||
#define AVRO_DECL
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_CustomAttributes_hh__
|
||||
#define avro_CustomAttributes_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include <boost/optional.hpp>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace avro {
|
||||
|
||||
// CustomAttributes class stores avro custom attributes.
|
||||
// Each attribute is represented by a unique name and value.
|
||||
// User is supposed to create CustomAttributes object and then add it to Schema.
|
||||
class AVRO_DECL CustomAttributes {
|
||||
public:
|
||||
// Retrieves the custom attribute json entity for that attributeName, returns an
|
||||
// null if the attribute doesn't exist.
|
||||
boost::optional<std::string> getAttribute(const std::string &name) const;
|
||||
|
||||
// Adds a custom attribute. If the attribute already exists, throw an exception.
|
||||
void addAttribute(const std::string &name, const std::string &value);
|
||||
|
||||
// Provides a way to iterate over the custom attributes or check attribute size.
|
||||
const std::map<std::string, std::string> &attributes() const {
|
||||
return attributes_;
|
||||
}
|
||||
|
||||
// Prints the attribute value for the specific attribute.
|
||||
void printJson(std::ostream &os, const std::string &name) const;
|
||||
|
||||
private:
|
||||
std::map<std::string, std::string> attributes_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,415 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_DataFile_hh__
|
||||
#define avro_DataFile_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Encoder.hh"
|
||||
#include "Specific.hh"
|
||||
#include "Stream.hh"
|
||||
#include "ValidSchema.hh"
|
||||
#include "buffer/Buffer.hh"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "array"
|
||||
#include "boost/utility.hpp"
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
|
||||
namespace avro {
|
||||
|
||||
/** Specify type of compression to use when writing data files. */
|
||||
enum Codec {
|
||||
NULL_CODEC,
|
||||
DEFLATE_CODEC,
|
||||
|
||||
#ifdef SNAPPY_CODEC_AVAILABLE
|
||||
SNAPPY_CODEC
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
const int SyncSize = 16;
|
||||
/**
|
||||
* The sync value.
|
||||
*/
|
||||
typedef std::array<uint8_t, SyncSize> DataFileSync;
|
||||
|
||||
/**
|
||||
* Type-independent portion of DataFileWriter.
|
||||
* At any given point in time, at most one file can be written using
|
||||
* this object.
|
||||
*/
|
||||
class AVRO_DECL DataFileWriterBase : boost::noncopyable {
|
||||
const std::string filename_;
|
||||
const ValidSchema schema_;
|
||||
const EncoderPtr encoderPtr_;
|
||||
const size_t syncInterval_;
|
||||
Codec codec_;
|
||||
|
||||
std::unique_ptr<OutputStream> stream_;
|
||||
std::unique_ptr<OutputStream> buffer_;
|
||||
const DataFileSync sync_;
|
||||
int64_t objectCount_;
|
||||
|
||||
typedef std::map<std::string, std::vector<uint8_t>> Metadata;
|
||||
|
||||
Metadata metadata_;
|
||||
int64_t lastSync_;
|
||||
|
||||
static std::unique_ptr<OutputStream> makeStream(const char *filename);
|
||||
static DataFileSync makeSync();
|
||||
|
||||
void writeHeader();
|
||||
void setMetadata(const std::string &key, const std::string &value);
|
||||
|
||||
/**
|
||||
* Generates a sync marker in the file.
|
||||
*/
|
||||
void sync();
|
||||
|
||||
/**
|
||||
* Shared constructor portion since we aren't using C++11
|
||||
*/
|
||||
void init(const ValidSchema &schema, size_t syncInterval, const Codec &codec);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Returns the current encoder for this writer.
|
||||
*/
|
||||
Encoder &encoder() const { return *encoderPtr_; }
|
||||
|
||||
/**
|
||||
* Returns true if the buffer has sufficient data for a sync to be
|
||||
* inserted.
|
||||
*/
|
||||
void syncIfNeeded();
|
||||
|
||||
/**
|
||||
* Returns the byte offset (within the current file) of the start of the current block being written.
|
||||
*/
|
||||
uint64_t getCurrentBlockStart() const;
|
||||
|
||||
/**
|
||||
* Increments the object count.
|
||||
*/
|
||||
void incr() {
|
||||
++objectCount_;
|
||||
}
|
||||
/**
|
||||
* Constructs a data file writer with the given sync interval and name.
|
||||
*/
|
||||
DataFileWriterBase(const char *filename, const ValidSchema &schema,
|
||||
size_t syncInterval, Codec codec = NULL_CODEC);
|
||||
DataFileWriterBase(std::unique_ptr<OutputStream> outputStream,
|
||||
const ValidSchema &schema, size_t syncInterval, Codec codec);
|
||||
|
||||
~DataFileWriterBase();
|
||||
/**
|
||||
* Closes the current file. Once closed this datafile object cannot be
|
||||
* used for writing any more.
|
||||
*/
|
||||
void close();
|
||||
|
||||
/**
|
||||
* Returns the schema for this data file.
|
||||
*/
|
||||
const ValidSchema &schema() const { return schema_; }
|
||||
|
||||
/**
|
||||
* Flushes any unwritten data into the file.
|
||||
*/
|
||||
void flush();
|
||||
};
|
||||
|
||||
/**
|
||||
* An Avro datafile that can store objects of type T.
|
||||
*/
|
||||
template<typename T>
|
||||
class DataFileWriter : boost::noncopyable {
|
||||
std::unique_ptr<DataFileWriterBase> base_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a new data file.
|
||||
*/
|
||||
DataFileWriter(const char *filename, const ValidSchema &schema,
|
||||
size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : base_(new DataFileWriterBase(filename, schema, syncInterval, codec)) {}
|
||||
|
||||
DataFileWriter(std::unique_ptr<OutputStream> outputStream, const ValidSchema &schema,
|
||||
size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : base_(new DataFileWriterBase(std::move(outputStream), schema, syncInterval, codec)) {}
|
||||
|
||||
/**
|
||||
* Writes the given piece of data into the file.
|
||||
*/
|
||||
void write(const T &datum) {
|
||||
base_->syncIfNeeded();
|
||||
avro::encode(base_->encoder(), datum);
|
||||
base_->incr();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the byte offset (within the current file) of the start of the current block being written.
|
||||
*/
|
||||
uint64_t getCurrentBlockStart() { return base_->getCurrentBlockStart(); }
|
||||
|
||||
/**
|
||||
* Closes the current file. Once closed this datafile object cannot be
|
||||
* used for writing any more.
|
||||
*/
|
||||
void close() { base_->close(); }
|
||||
|
||||
/**
|
||||
* Returns the schema for this data file.
|
||||
*/
|
||||
const ValidSchema &schema() const { return base_->schema(); }
|
||||
|
||||
/**
|
||||
* Flushes any unwritten data into the file.
|
||||
*/
|
||||
void flush() { base_->flush(); }
|
||||
};
|
||||
|
||||
/**
|
||||
* The type independent portion of reader.
|
||||
*/
|
||||
class AVRO_DECL DataFileReaderBase : boost::noncopyable {
|
||||
const std::string filename_;
|
||||
const std::unique_ptr<InputStream> stream_;
|
||||
const DecoderPtr decoder_;
|
||||
int64_t objectCount_;
|
||||
bool eof_;
|
||||
Codec codec_;
|
||||
int64_t blockStart_{};
|
||||
int64_t blockEnd_{};
|
||||
|
||||
ValidSchema readerSchema_;
|
||||
ValidSchema dataSchema_;
|
||||
DecoderPtr dataDecoder_;
|
||||
std::unique_ptr<InputStream> dataStream_;
|
||||
typedef std::map<std::string, std::vector<uint8_t>> Metadata;
|
||||
|
||||
Metadata metadata_;
|
||||
DataFileSync sync_{};
|
||||
|
||||
// for compressed buffer
|
||||
std::unique_ptr<boost::iostreams::filtering_istream> os_;
|
||||
std::vector<char> compressed_;
|
||||
std::string uncompressed;
|
||||
void readHeader();
|
||||
|
||||
void readDataBlock();
|
||||
void doSeek(int64_t position);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Returns the current decoder for this reader.
|
||||
*/
|
||||
Decoder &decoder() { return *dataDecoder_; }
|
||||
|
||||
/**
|
||||
* Returns true if and only if there is more to read.
|
||||
*/
|
||||
bool hasMore();
|
||||
|
||||
/**
|
||||
* Decrements the number of objects yet to read.
|
||||
*/
|
||||
void decr() { --objectCount_; }
|
||||
|
||||
/**
|
||||
* Constructs the reader for the given file and the reader is
|
||||
* expected to use the schema that is used with data.
|
||||
* This function should be called exactly once after constructing
|
||||
* the DataFileReaderBase object.
|
||||
*/
|
||||
explicit DataFileReaderBase(const char *filename);
|
||||
|
||||
explicit DataFileReaderBase(std::unique_ptr<InputStream> inputStream);
|
||||
|
||||
/**
|
||||
* Initializes the reader so that the reader and writer schemas
|
||||
* are the same.
|
||||
*/
|
||||
void init();
|
||||
|
||||
/**
|
||||
* Initializes the reader to read objects according to the given
|
||||
* schema. This gives an opportunity for the reader to see the schema
|
||||
* in the data file before deciding the right schema to use for reading.
|
||||
* This must be called exactly once after constructing the
|
||||
* DataFileReaderBase object.
|
||||
*/
|
||||
void init(const ValidSchema &readerSchema);
|
||||
|
||||
/**
|
||||
* Returns the schema for this object.
|
||||
*/
|
||||
const ValidSchema &readerSchema() { return readerSchema_; }
|
||||
|
||||
/**
|
||||
* Returns the schema stored with the data file.
|
||||
*/
|
||||
const ValidSchema &dataSchema() { return dataSchema_; }
|
||||
|
||||
/**
|
||||
* Closes the reader. No further operation is possible on this reader.
|
||||
*/
|
||||
void close();
|
||||
|
||||
/**
|
||||
* Move to a specific, known synchronization point, for example one returned
|
||||
* from tell() after sync().
|
||||
*/
|
||||
void seek(int64_t position);
|
||||
|
||||
/**
|
||||
* Move to the next synchronization point after a position. To process a
|
||||
* range of file entries, call this with the starting position, then check
|
||||
* pastSync() with the end point before each use of decoder().
|
||||
*/
|
||||
void sync(int64_t position);
|
||||
|
||||
/**
|
||||
* Return true if past the next synchronization point after a position.
|
||||
*/
|
||||
bool pastSync(int64_t position);
|
||||
|
||||
/**
|
||||
* Return the last synchronization point before our current position.
|
||||
*/
|
||||
int64_t previousSync() const;
|
||||
};
|
||||
|
||||
/**
|
||||
* Reads the contents of data file one after another.
|
||||
*/
|
||||
template<typename T>
|
||||
class DataFileReader : boost::noncopyable {
|
||||
std::unique_ptr<DataFileReaderBase> base_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs the reader for the given file and the reader is
|
||||
* expected to use the given schema.
|
||||
*/
|
||||
DataFileReader(const char *filename, const ValidSchema &readerSchema) : base_(new DataFileReaderBase(filename)) {
|
||||
base_->init(readerSchema);
|
||||
}
|
||||
|
||||
DataFileReader(std::unique_ptr<InputStream> inputStream, const ValidSchema &readerSchema) : base_(new DataFileReaderBase(std::move(inputStream))) {
|
||||
base_->init(readerSchema);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs the reader for the given file and the reader is
|
||||
* expected to use the schema that is used with data.
|
||||
*/
|
||||
explicit DataFileReader(const char *filename) : base_(new DataFileReaderBase(filename)) {
|
||||
base_->init();
|
||||
}
|
||||
|
||||
explicit DataFileReader(std::unique_ptr<InputStream> inputStream) : base_(new DataFileReaderBase(std::move(inputStream))) {
|
||||
base_->init();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a reader using the reader base. This form of constructor
|
||||
* allows the user to examine the schema of a given file and then
|
||||
* decide to use the right type of data to be deserialize. Without this
|
||||
* the user must know the type of data for the template _before_
|
||||
* he knows the schema within the file.
|
||||
* The schema present in the data file will be used for reading
|
||||
* from this reader.
|
||||
*/
|
||||
explicit DataFileReader(std::unique_ptr<DataFileReaderBase> base) : base_(std::move(base)) {
|
||||
base_->init();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a reader using the reader base. This form of constructor
|
||||
* allows the user to examine the schema of a given file and then
|
||||
* decide to use the right type of data to be deserialize. Without this
|
||||
* the user must know the type of data for the template _before_
|
||||
* he knows the schema within the file.
|
||||
* The argument readerSchema will be used for reading
|
||||
* from this reader.
|
||||
*/
|
||||
DataFileReader(std::unique_ptr<DataFileReaderBase> base,
|
||||
const ValidSchema &readerSchema) : base_(std::move(base)) {
|
||||
base_->init(readerSchema);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next entry from the data file.
|
||||
* \return true if an object has been successfully read into \p datum and
|
||||
* false if there are no more entries in the file.
|
||||
*/
|
||||
bool read(T &datum) {
|
||||
if (base_->hasMore()) {
|
||||
base_->decr();
|
||||
avro::decode(base_->decoder(), datum);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the schema for this object.
|
||||
*/
|
||||
const ValidSchema &readerSchema() { return base_->readerSchema(); }
|
||||
|
||||
/**
|
||||
* Returns the schema stored with the data file.
|
||||
*/
|
||||
const ValidSchema &dataSchema() { return base_->dataSchema(); }
|
||||
|
||||
/**
|
||||
* Closes the reader. No further operation is possible on this reader.
|
||||
*/
|
||||
void close() { return base_->close(); }
|
||||
|
||||
/**
|
||||
* Move to a specific, known synchronization point, for example one returned
|
||||
* from previousSync().
|
||||
*/
|
||||
void seek(int64_t position) { base_->seek(position); }
|
||||
|
||||
/**
|
||||
* Move to the next synchronization point after a position. To process a
|
||||
* range of file entries, call this with the starting position, then check
|
||||
* pastSync() with the end point before each call to read().
|
||||
*/
|
||||
void sync(int64_t position) { base_->sync(position); }
|
||||
|
||||
/**
|
||||
* Return true if past the next synchronization point after a position.
|
||||
*/
|
||||
bool pastSync(int64_t position) { return base_->pastSync(position); }
|
||||
|
||||
/**
|
||||
* Return the last synchronization point before our current position.
|
||||
*/
|
||||
int64_t previousSync() { return base_->previousSync(); }
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
#endif
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Decoder_hh__
|
||||
#define avro_Decoder_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Stream.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
/// \file
|
||||
///
|
||||
/// Low level support for decoding avro values.
|
||||
/// This class has two types of functions. One type of functions support
|
||||
/// decoding of leaf values (for example, decodeLong and
|
||||
/// decodeString). These functions have analogs in Encoder.
|
||||
///
|
||||
/// The other type of functions support decoding of maps and arrays.
|
||||
/// These functions are arrayStart, startItem, and arrayEnd
|
||||
/// (and similar functions for maps).
|
||||
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* Decoder is an interface implemented by every decoder capable
|
||||
* of decoding Avro data.
|
||||
*/
|
||||
class AVRO_DECL Decoder {
|
||||
public:
|
||||
virtual ~Decoder() = default;
|
||||
/// All future decoding will come from is, which should be valid
|
||||
/// until replaced by another call to init() or this Decoder is
|
||||
/// destructed.
|
||||
virtual void init(InputStream &is) = 0;
|
||||
|
||||
/// Decodes a null from the current stream.
|
||||
virtual void decodeNull() = 0;
|
||||
|
||||
/// Decodes a bool from the current stream
|
||||
virtual bool decodeBool() = 0;
|
||||
|
||||
/// Decodes a 32-bit int from the current stream.
|
||||
virtual int32_t decodeInt() = 0;
|
||||
|
||||
/// Decodes a 64-bit signed int from the current stream.
|
||||
virtual int64_t decodeLong() = 0;
|
||||
|
||||
/// Decodes a single-precision floating point number from current stream.
|
||||
virtual float decodeFloat() = 0;
|
||||
|
||||
/// Decodes a double-precision floating point number from current stream.
|
||||
virtual double decodeDouble() = 0;
|
||||
|
||||
/// Decodes a UTF-8 string from the current stream.
|
||||
std::string decodeString() {
|
||||
std::string result;
|
||||
decodeString(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a UTF-8 string from the stream and assigns it to value.
|
||||
*/
|
||||
virtual void decodeString(std::string &value) = 0;
|
||||
|
||||
/// Skips a string on the current stream.
|
||||
virtual void skipString() = 0;
|
||||
|
||||
/// Decodes arbitrary binary data from the current stream.
|
||||
std::vector<uint8_t> decodeBytes() {
|
||||
std::vector<uint8_t> result;
|
||||
decodeBytes(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Decodes arbitrary binary data from the current stream and puts it
|
||||
/// in value.
|
||||
virtual void decodeBytes(std::vector<uint8_t> &value) = 0;
|
||||
|
||||
/// Skips bytes on the current stream.
|
||||
virtual void skipBytes() = 0;
|
||||
|
||||
/**
|
||||
* Decodes fixed length binary from the current stream.
|
||||
* \param[in] n The size (byte count) of the fixed being read.
|
||||
* \return The fixed data that has been read. The size of the returned
|
||||
* vector is guaranteed to be equal to \p n.
|
||||
*/
|
||||
std::vector<uint8_t> decodeFixed(size_t n) {
|
||||
std::vector<uint8_t> result;
|
||||
decodeFixed(n, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a fixed from the current stream.
|
||||
* \param[in] n The size (byte count) of the fixed being read.
|
||||
* \param[out] value The value that receives the fixed. The vector will
|
||||
* be size-adjusted based on the fixed schema's size.
|
||||
*/
|
||||
virtual void decodeFixed(size_t n, std::vector<uint8_t> &value) = 0;
|
||||
|
||||
/// Skips fixed length binary on the current stream.
|
||||
virtual void skipFixed(size_t n) = 0;
|
||||
|
||||
/// Decodes enum from the current stream.
|
||||
virtual size_t decodeEnum() = 0;
|
||||
|
||||
/// Start decoding an array. Returns the number of entries in first chunk.
|
||||
virtual size_t arrayStart() = 0;
|
||||
|
||||
/// Returns the number of entries in next chunk. 0 if last.
|
||||
virtual size_t arrayNext() = 0;
|
||||
|
||||
/// Tries to skip an array. If it can, it returns 0. Otherwise
|
||||
/// it returns the number of elements to be skipped. The client
|
||||
/// should skip the individual items. In such cases, skipArray
|
||||
/// is identical to arrayStart.
|
||||
virtual size_t skipArray() = 0;
|
||||
|
||||
/// Start decoding a map. Returns the number of entries in first chunk.
|
||||
virtual size_t mapStart() = 0;
|
||||
|
||||
/// Returns the number of entries in next chunk. 0 if last.
|
||||
virtual size_t mapNext() = 0;
|
||||
|
||||
/// Tries to skip a map. If it can, it returns 0. Otherwise
|
||||
/// it returns the number of elements to be skipped. The client
|
||||
/// should skip the individual items. In such cases, skipMap
|
||||
/// is identical to mapStart.
|
||||
virtual size_t skipMap() = 0;
|
||||
|
||||
/// Decodes a branch of a union. The actual value is to follow.
|
||||
virtual size_t decodeUnionIndex() = 0;
|
||||
|
||||
/// Drains any additional data at the end of the current entry in a stream.
|
||||
/// It also returns any unused bytes back to any underlying input stream.
|
||||
/// One situation this happens is when the reader's schema and
|
||||
/// the writer's schema are records but are different and the writer's
|
||||
/// record has more fields at the end of the record.
|
||||
/// Leaving such data unread is usually not a problem. If multiple
|
||||
/// records are stored consecutively in a stream (e.g. Avro data file)
|
||||
/// any attempt to read the next record will automatically skip
|
||||
/// those extra fields of the current record. It would still leave
|
||||
/// the extra fields at the end of the last record in the stream.
|
||||
/// This would mean that the stream is not in a good state. For example,
|
||||
/// if some non-avro information is stored at the end of the stream,
|
||||
/// the consumers of such data would see the bytes left behind
|
||||
/// by the avro decoder. Similar set of problems occur if the Decoder
|
||||
/// consumes more than what it should.
|
||||
virtual void drain() = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Shared pointer to Decoder.
|
||||
*/
|
||||
using DecoderPtr = std::shared_ptr<Decoder>;
|
||||
|
||||
/**
|
||||
* ResolvingDecoder is derived from \ref Decoder, with an additional
|
||||
* function to obtain the field ordering of fields within a record.
|
||||
*/
|
||||
class AVRO_DECL ResolvingDecoder : public Decoder {
|
||||
public:
|
||||
/// Returns the order of fields for records.
|
||||
/// The order of fields could be different from the order of their
|
||||
/// order in the schema because the writer's field order could
|
||||
/// be different. In order to avoid buffering and later use,
|
||||
/// we return the values in the writer's field order.
|
||||
virtual const std::vector<size_t> &fieldOrder() = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Shared pointer to ResolvingDecoder.
|
||||
*/
|
||||
using ResolvingDecoderPtr = std::shared_ptr<ResolvingDecoder>;
|
||||
/**
|
||||
* Returns an decoder that can decode binary Avro standard.
|
||||
*/
|
||||
AVRO_DECL DecoderPtr binaryDecoder();
|
||||
|
||||
/**
|
||||
* Returns an decoder that validates sequence of calls to an underlying
|
||||
* Decoder against the given schema.
|
||||
*/
|
||||
AVRO_DECL DecoderPtr validatingDecoder(const ValidSchema &schema,
|
||||
const DecoderPtr &base);
|
||||
|
||||
/**
|
||||
* Returns an decoder that can decode Avro standard for JSON.
|
||||
*/
|
||||
AVRO_DECL DecoderPtr jsonDecoder(const ValidSchema &schema);
|
||||
|
||||
/**
|
||||
* Returns a decoder that decodes avro data from base written according to
|
||||
* writerSchema and resolves against readerSchema.
|
||||
* The client uses the decoder as if the data were written using readerSchema.
|
||||
* // FIXME: Handle out of order fields.
|
||||
*/
|
||||
AVRO_DECL ResolvingDecoderPtr resolvingDecoder(const ValidSchema &writer,
|
||||
const ValidSchema &reader, const DecoderPtr &base);
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Encoder_hh__
|
||||
#define avro_Encoder_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Stream.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
/// \file
|
||||
///
|
||||
/// Low level support for encoding avro values.
|
||||
/// This class has two types of functions. One type of functions support
|
||||
/// the writing of leaf values (for example, encodeLong and
|
||||
/// encodeString). These functions have analogs in Decoder.
|
||||
///
|
||||
/// The other type of functions support the writing of maps and arrays.
|
||||
/// These functions are arrayStart, startItem, and arrayEnd
|
||||
/// (and similar functions for maps).
|
||||
/// Some implementations of Encoder handle the
|
||||
/// buffering required to break large maps and arrays into blocks,
|
||||
/// which is necessary for applications that want to do streaming.
|
||||
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* The abstract base class for all Avro encoders. The implementations
|
||||
* differ in the method of encoding (binary versus JSON) or in capabilities
|
||||
* such as ability to verify the order of invocation of different functions.
|
||||
*/
|
||||
class AVRO_DECL Encoder {
|
||||
public:
|
||||
virtual ~Encoder() = default;
|
||||
/// All future encodings will go to os, which should be valid until
|
||||
/// it is reset with another call to init() or the encoder is
|
||||
/// destructed.
|
||||
virtual void init(OutputStream &os) = 0;
|
||||
|
||||
/// Flushes any data in internal buffers.
|
||||
virtual void flush() = 0;
|
||||
|
||||
/// Returns the number of bytes produced so far.
|
||||
/// For a meaningful value, do a flush() before invoking this function.
|
||||
virtual int64_t byteCount() const = 0;
|
||||
|
||||
/// Encodes a null to the current stream.
|
||||
virtual void encodeNull() = 0;
|
||||
|
||||
/// Encodes a bool to the current stream
|
||||
virtual void encodeBool(bool b) = 0;
|
||||
|
||||
/// Encodes a 32-bit int to the current stream.
|
||||
virtual void encodeInt(int32_t i) = 0;
|
||||
|
||||
/// Encodes a 64-bit signed int to the current stream.
|
||||
virtual void encodeLong(int64_t l) = 0;
|
||||
|
||||
/// Encodes a single-precision floating point number to the current stream.
|
||||
virtual void encodeFloat(float f) = 0;
|
||||
|
||||
/// Encodes a double-precision floating point number to the current stream.
|
||||
virtual void encodeDouble(double d) = 0;
|
||||
|
||||
/// Encodes a UTF-8 string to the current stream.
|
||||
virtual void encodeString(const std::string &s) = 0;
|
||||
|
||||
/**
|
||||
* Encodes arbitrary binary data into the current stream as Avro "bytes"
|
||||
* data type.
|
||||
* \param bytes Where the data is
|
||||
* \param len Number of bytes at \p bytes.
|
||||
*/
|
||||
virtual void encodeBytes(const uint8_t *bytes, size_t len) = 0;
|
||||
|
||||
/**
|
||||
* Encodes arbitrary binary data into the current stream as Avro "bytes"
|
||||
* data type.
|
||||
* \param bytes The data.
|
||||
*/
|
||||
void encodeBytes(const std::vector<uint8_t> &bytes) {
|
||||
uint8_t b = 0;
|
||||
encodeBytes(bytes.empty() ? &b : bytes.data(), bytes.size());
|
||||
}
|
||||
|
||||
/// Encodes fixed length binary to the current stream.
|
||||
virtual void encodeFixed(const uint8_t *bytes, size_t len) = 0;
|
||||
|
||||
/**
|
||||
* Encodes an Avro data type Fixed.
|
||||
* \param bytes The fixed, the length of which is taken as the size
|
||||
* of fixed.
|
||||
*/
|
||||
void encodeFixed(const std::vector<uint8_t> &bytes) {
|
||||
encodeFixed(bytes.data(), bytes.size());
|
||||
}
|
||||
|
||||
/// Encodes enum to the current stream.
|
||||
virtual void encodeEnum(size_t e) = 0;
|
||||
|
||||
/// Indicates that an array of items is being encoded.
|
||||
virtual void arrayStart() = 0;
|
||||
|
||||
/// Indicates that the current array of items have ended.
|
||||
virtual void arrayEnd() = 0;
|
||||
|
||||
/// Indicates that a map of items is being encoded.
|
||||
virtual void mapStart() = 0;
|
||||
|
||||
/// Indicates that the current map of items have ended.
|
||||
virtual void mapEnd() = 0;
|
||||
|
||||
/// Indicates that count number of items are to follow in the current array
|
||||
/// or map.
|
||||
virtual void setItemCount(size_t count) = 0;
|
||||
|
||||
/// Marks a beginning of an item in the current array or map.
|
||||
virtual void startItem() = 0;
|
||||
|
||||
/// Encodes a branch of a union. The actual value is to follow.
|
||||
virtual void encodeUnionIndex(size_t e) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Shared pointer to Encoder.
|
||||
*/
|
||||
using EncoderPtr = std::shared_ptr<Encoder>;
|
||||
|
||||
/**
|
||||
* Returns an encoder that can encode binary Avro standard.
|
||||
*/
|
||||
AVRO_DECL EncoderPtr binaryEncoder();
|
||||
|
||||
/**
|
||||
* Returns an encoder that validates sequence of calls to an underlying
|
||||
* Encoder against the given schema.
|
||||
*/
|
||||
AVRO_DECL EncoderPtr validatingEncoder(const ValidSchema &schema,
|
||||
const EncoderPtr &base);
|
||||
|
||||
/**
|
||||
* Returns an encoder that encodes Avro standard for JSON.
|
||||
*/
|
||||
AVRO_DECL EncoderPtr jsonEncoder(const ValidSchema &schema);
|
||||
|
||||
/**
|
||||
* Returns an encoder that encodes Avro standard for pretty printed JSON.
|
||||
*/
|
||||
AVRO_DECL EncoderPtr jsonPrettyEncoder(const ValidSchema &schema);
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Exception_hh__
|
||||
#define avro_Exception_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include <fmt/core.h>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace avro {
|
||||
|
||||
/// Wrapper for std::runtime_error that provides convenience constructor
|
||||
/// for formatted messages
|
||||
|
||||
class AVRO_DECL Exception : public virtual std::runtime_error {
|
||||
public:
|
||||
explicit Exception(const std::string &msg) : std::runtime_error(msg) {}
|
||||
|
||||
template<typename... Args>
|
||||
Exception(fmt::format_string<Args...> fmt, Args &&...args)
|
||||
: std::runtime_error(fmt::format(fmt, std::forward<Args>(args)...)) {}
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Generic_hh__
|
||||
#define avro_Generic_hh__
|
||||
|
||||
#include <boost/utility.hpp>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Decoder.hh"
|
||||
#include "Encoder.hh"
|
||||
#include "GenericDatum.hh"
|
||||
#include "Types.hh"
|
||||
|
||||
namespace avro {
|
||||
/**
|
||||
* A utility class to read generic datum from decoders.
|
||||
*/
|
||||
class AVRO_DECL GenericReader : boost::noncopyable {
|
||||
const ValidSchema schema_;
|
||||
const bool isResolving_;
|
||||
const DecoderPtr decoder_;
|
||||
|
||||
static void read(GenericDatum &datum, Decoder &d, bool isResolving);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a reader for the given schema using the given decoder.
|
||||
*/
|
||||
GenericReader(ValidSchema s, const DecoderPtr &decoder);
|
||||
|
||||
/**
|
||||
* Constructs a reader for the given reader's schema \c readerSchema
|
||||
* using the given
|
||||
* decoder which holds data matching writer's schema \c writerSchema.
|
||||
*/
|
||||
GenericReader(const ValidSchema &writerSchema,
|
||||
const ValidSchema &readerSchema, const DecoderPtr &decoder);
|
||||
|
||||
/**
|
||||
* Reads a value off the decoder.
|
||||
*/
|
||||
void read(GenericDatum &datum) const;
|
||||
|
||||
/**
|
||||
* Drains any residual bytes in the input stream (e.g. because
|
||||
* reader's schema has no use of them) and return unused bytes
|
||||
* back to the underlying input stream.
|
||||
*/
|
||||
void drain() {
|
||||
decoder_->drain();
|
||||
}
|
||||
/**
|
||||
* Reads a generic datum from the stream, using the given schema.
|
||||
*/
|
||||
static void read(Decoder &d, GenericDatum &g);
|
||||
|
||||
/**
|
||||
* Reads a generic datum from the stream, using the given schema.
|
||||
*/
|
||||
static void read(Decoder &d, GenericDatum &g, const ValidSchema &s);
|
||||
};
|
||||
|
||||
/**
|
||||
* A utility class to write generic datum to encoders.
|
||||
*/
|
||||
class AVRO_DECL GenericWriter : boost::noncopyable {
|
||||
const ValidSchema schema_;
|
||||
const EncoderPtr encoder_;
|
||||
|
||||
static void write(const GenericDatum &datum, Encoder &e);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a writer for the given schema using the given encoder.
|
||||
*/
|
||||
GenericWriter(ValidSchema s, EncoderPtr encoder);
|
||||
|
||||
/**
|
||||
* Writes a value onto the encoder.
|
||||
*/
|
||||
void write(const GenericDatum &datum) const;
|
||||
|
||||
/**
|
||||
* Writes a generic datum on to the stream.
|
||||
*/
|
||||
static void write(Encoder &e, const GenericDatum &g);
|
||||
|
||||
/**
|
||||
* Writes a generic datum on to the stream, using the given schema.
|
||||
* Retained for backward compatibility.
|
||||
*/
|
||||
static void write(Encoder &e, const GenericDatum &g, const ValidSchema &) {
|
||||
write(e, g);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct codec_traits;
|
||||
|
||||
/**
|
||||
* Specialization of codec_traits for Generic datum along with its schema.
|
||||
* This is maintained for compatibility with old code. Please use the
|
||||
* cleaner codec_traits<GenericDatum> instead.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<std::pair<ValidSchema, GenericDatum>> {
|
||||
/** Encodes */
|
||||
static void encode(Encoder &e,
|
||||
const std::pair<ValidSchema, GenericDatum> &p) {
|
||||
GenericWriter::write(e, p.second, p.first);
|
||||
}
|
||||
|
||||
/** Decodes */
|
||||
static void decode(Decoder &d, std::pair<ValidSchema, GenericDatum> &p) {
|
||||
GenericReader::read(d, p.second, p.first);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Specialization of codec_traits for GenericDatum.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<GenericDatum> {
|
||||
/** Encodes */
|
||||
static void encode(Encoder &e, const GenericDatum &g) {
|
||||
GenericWriter::write(e, g);
|
||||
}
|
||||
|
||||
/** Decodes */
|
||||
static void decode(Decoder &d, GenericDatum &g) {
|
||||
GenericReader::read(d, g);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
#endif
|
||||
|
|
@ -0,0 +1,559 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_GenericDatum_hh__
|
||||
#define avro_GenericDatum_hh__
|
||||
|
||||
#include <any>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "LogicalType.hh"
|
||||
#include "Node.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* Generic datum which can hold any Avro type. The datum has a type
|
||||
* and a value. The type is one of the Avro data types. The C++ type for
|
||||
* value corresponds to the Avro type.
|
||||
* \li An Avro <tt>null</tt> corresponds to no C++ type. It is illegal to
|
||||
* to try to access values for <tt>null</tt>.
|
||||
* \li Avro <tt>boolean</tt> maps to C++ <tt>bool</tt>
|
||||
* \li Avro <tt>int</tt> maps to C++ <tt>int32_t</tt>.
|
||||
* \li Avro <tt>long</tt> maps to C++ <tt>int64_t</tt>.
|
||||
* \li Avro <tt>float</tt> maps to C++ <tt>float</tt>.
|
||||
* \li Avro <tt>double</tt> maps to C++ <tt>double</tt>.
|
||||
* \li Avro <tt>string</tt> maps to C++ <tt>std::string</tt>.
|
||||
* \li Avro <tt>bytes</tt> maps to C++ <tt>std::vector<uint_t></tt>.
|
||||
* \li Avro <tt>fixed</tt> maps to C++ class <tt>GenericFixed</tt>.
|
||||
* \li Avro <tt>enum</tt> maps to C++ class <tt>GenericEnum</tt>.
|
||||
* \li Avro <tt>array</tt> maps to C++ class <tt>GenericArray</tt>.
|
||||
* \li Avro <tt>map</tt> maps to C++ class <tt>GenericMap</tt>.
|
||||
* \li There is no C++ type corresponding to Avro <tt>union</tt>. The
|
||||
* object should have the C++ type corresponding to one of the constituent
|
||||
* types of the union.
|
||||
*
|
||||
*/
|
||||
class AVRO_DECL GenericDatum {
|
||||
protected:
|
||||
Type type_;
|
||||
LogicalType logicalType_;
|
||||
std::any value_;
|
||||
|
||||
explicit GenericDatum(Type t)
|
||||
: type_(t), logicalType_(LogicalType::NONE) {}
|
||||
|
||||
GenericDatum(Type t, LogicalType logicalType)
|
||||
: type_(t), logicalType_(logicalType) {}
|
||||
|
||||
template<typename T>
|
||||
GenericDatum(Type t, LogicalType logicalType, const T &v)
|
||||
: type_(t), logicalType_(logicalType), value_(v) {}
|
||||
|
||||
void init(const NodePtr &schema);
|
||||
|
||||
public:
|
||||
/**
|
||||
* The avro data type this datum holds.
|
||||
*/
|
||||
Type type() const;
|
||||
|
||||
/**
|
||||
* The avro logical type that augments the main data type this datum holds.
|
||||
*/
|
||||
LogicalType logicalType() const;
|
||||
|
||||
/**
|
||||
* Returns the value held by this datum.
|
||||
* T The type for the value. This must correspond to the
|
||||
* avro type returned by type().
|
||||
*/
|
||||
template<typename T>
|
||||
const T &value() const;
|
||||
|
||||
/**
|
||||
* Returns the reference to the value held by this datum, which
|
||||
* can be used to change the contents. Please note that only
|
||||
* value can be changed, the data type of the value held cannot
|
||||
* be changed.
|
||||
*
|
||||
* T The type for the value. This must correspond to the
|
||||
* avro type returned by type().
|
||||
*/
|
||||
template<typename T>
|
||||
T &value();
|
||||
|
||||
/**
|
||||
* Returns true if and only if this datum is a union.
|
||||
*/
|
||||
bool isUnion() const { return type_ == AVRO_UNION; }
|
||||
|
||||
/**
|
||||
* Returns the index of the current branch, if this is a union.
|
||||
* \sa isUnion().
|
||||
*/
|
||||
size_t unionBranch() const;
|
||||
|
||||
/**
|
||||
* Selects a new branch in the union if this is a union.
|
||||
* \sa isUnion().
|
||||
*/
|
||||
void selectBranch(size_t branch);
|
||||
|
||||
/// Makes a new AVRO_NULL datum.
|
||||
GenericDatum() : type_(AVRO_NULL), logicalType_(LogicalType::NONE) {}
|
||||
|
||||
/// Makes a new AVRO_BOOL datum whose value is of type bool.
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(bool v)
|
||||
: type_(AVRO_BOOL), logicalType_(LogicalType::NONE), value_(v) {}
|
||||
|
||||
/// Makes a new AVRO_INT datum whose value is of type int32_t.
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(int32_t v)
|
||||
: type_(AVRO_INT), logicalType_(LogicalType::NONE), value_(v) {}
|
||||
|
||||
/// Makes a new AVRO_LONG datum whose value is of type int64_t.
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(int64_t v)
|
||||
: type_(AVRO_LONG), logicalType_(LogicalType::NONE), value_(v) {}
|
||||
|
||||
/// Makes a new AVRO_FLOAT datum whose value is of type float.
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(float v)
|
||||
: type_(AVRO_FLOAT), logicalType_(LogicalType::NONE), value_(v) {}
|
||||
|
||||
/// Makes a new AVRO_DOUBLE datum whose value is of type double.
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(double v)
|
||||
: type_(AVRO_DOUBLE), logicalType_(LogicalType::NONE), value_(v) {}
|
||||
|
||||
/// Makes a new AVRO_STRING datum whose value is of type std::string.
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(const std::string &v)
|
||||
: type_(AVRO_STRING), logicalType_(LogicalType::NONE), value_(v) {}
|
||||
|
||||
/// Makes a new AVRO_BYTES datum whose value is of type
|
||||
/// std::vector<uint8_t>.
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(const std::vector<uint8_t> &v) : type_(AVRO_BYTES), logicalType_(LogicalType::NONE), value_(v) {}
|
||||
|
||||
/**
|
||||
* Constructs a datum corresponding to the given avro type.
|
||||
* The value will the appropriate default corresponding to the
|
||||
* data type.
|
||||
* \param schema The schema that defines the avro type.
|
||||
*/
|
||||
/// We don't make this explicit constructor because we want to allow automatic conversion
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
GenericDatum(const NodePtr &schema);
|
||||
|
||||
/**
|
||||
* Constructs a datum corresponding to the given avro type and set
|
||||
* the value.
|
||||
* \param schema The schema that defines the avro type.
|
||||
* \param v The value for this type.
|
||||
*/
|
||||
template<typename T>
|
||||
GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) {
|
||||
init(schema);
|
||||
*std::any_cast<T>(&value_) = v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a datum corresponding to the given avro type.
|
||||
* The value will the appropriate default corresponding to the
|
||||
* data type.
|
||||
* \param schema The schema that defines the avro type.
|
||||
*/
|
||||
explicit GenericDatum(const ValidSchema &schema);
|
||||
};
|
||||
|
||||
/**
|
||||
* The base class for all generic type for containers.
|
||||
*/
|
||||
class AVRO_DECL GenericContainer {
|
||||
NodePtr schema_;
|
||||
static void assertType(const NodePtr &schema, Type type);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Constructs a container corresponding to the given schema.
|
||||
*/
|
||||
GenericContainer(Type type, const NodePtr &s) : schema_(s) {
|
||||
assertType(s, type);
|
||||
}
|
||||
|
||||
public:
|
||||
/// Returns the schema for this object
|
||||
const NodePtr &schema() const {
|
||||
return schema_;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generic container for unions.
|
||||
*/
|
||||
class AVRO_DECL GenericUnion : public GenericContainer {
|
||||
size_t curBranch_;
|
||||
GenericDatum datum_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a generic union corresponding to the given schema \p schema,
|
||||
* and the given value. The schema should be of Avro type union
|
||||
* and the value should correspond to one of the branches of the union.
|
||||
*/
|
||||
explicit GenericUnion(const NodePtr &schema) : GenericContainer(AVRO_UNION, schema), curBranch_(schema->leaves()) {
|
||||
selectBranch(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the current branch.
|
||||
*/
|
||||
size_t currentBranch() const { return curBranch_; }
|
||||
|
||||
/**
|
||||
* Selects a new branch. The type for the value is changed accordingly.
|
||||
* \param branch The index for the selected branch.
|
||||
*/
|
||||
void selectBranch(size_t branch) {
|
||||
if (curBranch_ != branch) {
|
||||
datum_ = GenericDatum(schema()->leafAt(branch));
|
||||
curBranch_ = branch;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the datum corresponding to the currently selected branch
|
||||
* in this union.
|
||||
*/
|
||||
GenericDatum &datum() {
|
||||
return datum_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the datum corresponding to the currently selected branch
|
||||
* in this union.
|
||||
*/
|
||||
const GenericDatum &datum() const {
|
||||
return datum_;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* The generic container for Avro records.
|
||||
*/
|
||||
class AVRO_DECL GenericRecord : public GenericContainer {
|
||||
std::vector<GenericDatum> fields_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a generic record corresponding to the given schema \p schema,
|
||||
* which should be of Avro type record.
|
||||
*/
|
||||
explicit GenericRecord(const NodePtr &schema);
|
||||
|
||||
/**
|
||||
* Returns the number of fields in the current record.
|
||||
*/
|
||||
size_t fieldCount() const {
|
||||
return fields_.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns index of the field with the given name \p name
|
||||
*/
|
||||
size_t fieldIndex(const std::string &name) const {
|
||||
size_t index = 0;
|
||||
if (!schema()->nameIndex(name, index)) {
|
||||
throw Exception("Invalid field name: " + name);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if a field with the given name \p name is located in this r
|
||||
* false otherwise
|
||||
*/
|
||||
bool hasField(const std::string &name) const {
|
||||
size_t index = 0;
|
||||
return schema()->nameIndex(name, index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field with the given name \p name.
|
||||
*/
|
||||
const GenericDatum &field(const std::string &name) const {
|
||||
return fieldAt(fieldIndex(name));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reference to the field with the given name \p name,
|
||||
* which can be used to change the contents.
|
||||
*/
|
||||
GenericDatum &field(const std::string &name) {
|
||||
return fieldAt(fieldIndex(name));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field at the given position \p pos.
|
||||
*/
|
||||
const GenericDatum &fieldAt(size_t pos) const {
|
||||
return fields_[pos];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reference to the field at the given position \p pos,
|
||||
* which can be used to change the contents.
|
||||
*/
|
||||
GenericDatum &fieldAt(size_t pos) {
|
||||
return fields_[pos];
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces the field at the given position \p pos with \p v.
|
||||
*/
|
||||
void setFieldAt(size_t pos, const GenericDatum &v) {
|
||||
// assertSameType(v, schema()->leafAt(pos));
|
||||
fields_[pos] = v;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* The generic container for Avro arrays.
|
||||
*/
|
||||
class AVRO_DECL GenericArray : public GenericContainer {
|
||||
public:
|
||||
/**
|
||||
* The contents type for the array.
|
||||
*/
|
||||
typedef std::vector<GenericDatum> Value;
|
||||
|
||||
/**
|
||||
* Constructs a generic array corresponding to the given schema \p schema,
|
||||
* which should be of Avro type array.
|
||||
*/
|
||||
explicit GenericArray(const NodePtr &schema) : GenericContainer(AVRO_ARRAY, schema) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the contents of this array.
|
||||
*/
|
||||
const Value &value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reference to the contents of this array.
|
||||
*/
|
||||
Value &value() {
|
||||
return value_;
|
||||
}
|
||||
|
||||
private:
|
||||
Value value_;
|
||||
};
|
||||
|
||||
/**
|
||||
* The generic container for Avro maps.
|
||||
*/
|
||||
class AVRO_DECL GenericMap : public GenericContainer {
|
||||
public:
|
||||
/**
|
||||
* The contents type for the map.
|
||||
*/
|
||||
typedef std::vector<std::pair<std::string, GenericDatum>> Value;
|
||||
|
||||
/**
|
||||
* Constructs a generic map corresponding to the given schema \p schema,
|
||||
* which should be of Avro type map.
|
||||
*/
|
||||
explicit GenericMap(const NodePtr &schema) : GenericContainer(AVRO_MAP, schema) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the contents of this map.
|
||||
*/
|
||||
const Value &value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reference to the contents of this map.
|
||||
*/
|
||||
Value &value() {
|
||||
return value_;
|
||||
}
|
||||
|
||||
private:
|
||||
Value value_;
|
||||
};
|
||||
|
||||
/**
|
||||
* Generic container for Avro enum.
|
||||
*/
|
||||
class AVRO_DECL GenericEnum : public GenericContainer {
|
||||
size_t value_;
|
||||
|
||||
static size_t index(const NodePtr &schema, const std::string &symbol) {
|
||||
size_t result;
|
||||
if (schema->nameIndex(symbol, result)) {
|
||||
return result;
|
||||
}
|
||||
throw Exception("No such symbol");
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a generic enum corresponding to the given schema \p schema,
|
||||
* which should be of Avro type enum.
|
||||
*/
|
||||
explicit GenericEnum(const NodePtr &schema) : GenericContainer(AVRO_ENUM, schema), value_(0) {
|
||||
}
|
||||
|
||||
GenericEnum(const NodePtr &schema, const std::string &symbol) : GenericContainer(AVRO_ENUM, schema), value_(index(schema, symbol)) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the symbol corresponding to the cardinal \p n. If the
|
||||
* value for \p n is not within the limits an exception is thrown.
|
||||
*/
|
||||
const std::string &symbol(size_t n) {
|
||||
if (n < schema()->names()) {
|
||||
return schema()->nameAt(n);
|
||||
}
|
||||
throw Exception("Not as many symbols");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cardinal for the given symbol \c symbol. If the symbol
|
||||
* is not defined for this enum and exception is thrown.
|
||||
*/
|
||||
size_t index(const std::string &symbol) const {
|
||||
return index(schema(), symbol);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value for this enum corresponding to the given symbol \c symbol.
|
||||
*/
|
||||
size_t set(const std::string &symbol) {
|
||||
return value_ = index(symbol);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value for this enum corresponding to the given cardinal \c n.
|
||||
*/
|
||||
void set(size_t n) {
|
||||
if (n < schema()->names()) {
|
||||
value_ = n;
|
||||
return;
|
||||
}
|
||||
throw Exception("Not as many symbols");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cardinal for the current value of this enum.
|
||||
*/
|
||||
size_t value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the symbol for the current value of this enum.
|
||||
*/
|
||||
const std::string &symbol() const {
|
||||
return schema()->nameAt(value_);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generic container for Avro fixed.
|
||||
*/
|
||||
class AVRO_DECL GenericFixed : public GenericContainer {
|
||||
std::vector<uint8_t> value_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs a generic enum corresponding to the given schema \p schema,
|
||||
* which should be of Avro type fixed.
|
||||
*/
|
||||
explicit GenericFixed(const NodePtr &schema) : GenericContainer(AVRO_FIXED, schema) {
|
||||
value_.resize(schema->fixedSize());
|
||||
}
|
||||
|
||||
GenericFixed(const NodePtr &schema, const std::vector<uint8_t> &v);
|
||||
|
||||
/**
|
||||
* Returns the contents of this fixed.
|
||||
*/
|
||||
const std::vector<uint8_t> &value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reference to the contents of this fixed.
|
||||
*/
|
||||
std::vector<uint8_t> &value() {
|
||||
return value_;
|
||||
}
|
||||
};
|
||||
|
||||
inline Type GenericDatum::type() const {
|
||||
return (type_ == AVRO_UNION) ? std::any_cast<GenericUnion>(&value_)->datum().type()
|
||||
: type_;
|
||||
}
|
||||
|
||||
inline LogicalType GenericDatum::logicalType() const {
|
||||
return (type_ == AVRO_UNION) ? std::any_cast<GenericUnion>(&value_)->datum().logicalType()
|
||||
: logicalType_;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T &GenericDatum::value() {
|
||||
return (type_ == AVRO_UNION) ? std::any_cast<GenericUnion>(&value_)->datum().value<T>()
|
||||
: *std::any_cast<T>(&value_);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T &GenericDatum::value() const {
|
||||
return (type_ == AVRO_UNION) ? std::any_cast<GenericUnion>(&value_)->datum().value<T>()
|
||||
: *std::any_cast<T>(&value_);
|
||||
}
|
||||
|
||||
inline size_t GenericDatum::unionBranch() const {
|
||||
return std::any_cast<GenericUnion>(&value_)->currentBranch();
|
||||
}
|
||||
|
||||
inline void GenericDatum::selectBranch(size_t branch) {
|
||||
std::any_cast<GenericUnion>(&value_)->selectBranch(branch);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
#endif // avro_GenericDatum_hh__
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Layout_hh__
|
||||
#define avro_Layout_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
/// \file Layout.hh
|
||||
///
|
||||
|
||||
namespace avro {
|
||||
|
||||
class AVRO_DECL Layout : private boost::noncopyable {
|
||||
protected:
|
||||
explicit Layout(size_t offset = 0) : offset_(offset) {}
|
||||
|
||||
public:
|
||||
size_t offset() const {
|
||||
return offset_;
|
||||
}
|
||||
virtual ~Layout() = default;
|
||||
|
||||
private:
|
||||
const size_t offset_;
|
||||
};
|
||||
|
||||
class AVRO_DECL PrimitiveLayout : public Layout {
|
||||
public:
|
||||
explicit PrimitiveLayout(size_t offset = 0) : Layout(offset) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL CompoundLayout : public Layout {
|
||||
|
||||
public:
|
||||
explicit CompoundLayout(size_t offset = 0) : Layout(offset) {}
|
||||
|
||||
void add(std::unique_ptr<Layout> &layout) {
|
||||
layouts_.push_back(std::move(layout));
|
||||
}
|
||||
|
||||
const Layout &at(size_t idx) const {
|
||||
return *layouts_.at(idx);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<Layout>> layouts_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_LogicalType_hh__
|
||||
#define avro_LogicalType_hh__
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "Config.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
class AVRO_DECL LogicalType {
|
||||
public:
|
||||
enum Type {
|
||||
NONE,
|
||||
DECIMAL,
|
||||
DATE,
|
||||
TIME_MILLIS,
|
||||
TIME_MICROS,
|
||||
TIMESTAMP_MILLIS,
|
||||
TIMESTAMP_MICROS,
|
||||
DURATION,
|
||||
UUID
|
||||
};
|
||||
|
||||
explicit LogicalType(Type type);
|
||||
|
||||
Type type() const;
|
||||
|
||||
// Precision and scale can only be set for the DECIMAL logical type.
|
||||
// Precision must be positive and scale must be either positive or zero. The
|
||||
// setters will throw an exception if they are called on any type other
|
||||
// than DECIMAL.
|
||||
void setPrecision(int32_t precision);
|
||||
int32_t precision() const { return precision_; }
|
||||
void setScale(int32_t scale);
|
||||
int32_t scale() const { return scale_; }
|
||||
|
||||
void printJson(std::ostream &os) const;
|
||||
|
||||
private:
|
||||
Type type_;
|
||||
int32_t precision_;
|
||||
int32_t scale_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Node_hh__
|
||||
#define avro_Node_hh__
|
||||
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "CustomAttributes.hh"
|
||||
#include "Exception.hh"
|
||||
#include "LogicalType.hh"
|
||||
#include "SchemaResolution.hh"
|
||||
#include "Types.hh"
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
namespace avro {
|
||||
|
||||
class Node;
|
||||
class GenericDatum;
|
||||
|
||||
using NodePtr = std::shared_ptr<Node>;
|
||||
|
||||
class AVRO_DECL Name {
|
||||
struct Aliases;
|
||||
|
||||
std::string ns_;
|
||||
std::string simpleName_;
|
||||
std::unique_ptr<Aliases> aliases_;
|
||||
|
||||
public:
|
||||
Name();
|
||||
explicit Name(const std::string& name);
|
||||
Name(std::string simpleName, std::string ns);
|
||||
Name(const Name& other);
|
||||
Name& operator=(const Name& other);
|
||||
Name(Name&& other);
|
||||
Name& operator=(Name&& other);
|
||||
~Name();
|
||||
|
||||
std::string fullname() const;
|
||||
const std::string& ns() const {
|
||||
return ns_;
|
||||
}
|
||||
const std::string& simpleName() const {
|
||||
return simpleName_;
|
||||
}
|
||||
const std::vector<std::string>& aliases() const;
|
||||
|
||||
void ns(std::string n) {
|
||||
ns_ = std::move(n);
|
||||
}
|
||||
void simpleName(std::string n) {
|
||||
simpleName_ = std::move(n);
|
||||
}
|
||||
void fullname(const std::string& n);
|
||||
void addAlias(const std::string& alias);
|
||||
|
||||
bool operator<(const Name& n) const;
|
||||
void check() const;
|
||||
bool operator==(const Name& n) const;
|
||||
bool operator!=(const Name& n) const {
|
||||
return !((*this) == n);
|
||||
}
|
||||
bool equalOrAliasedBy(const Name& n) const;
|
||||
void clear();
|
||||
explicit operator std::string() const {
|
||||
return fullname();
|
||||
}
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const Name& n) {
|
||||
return os << n.fullname();
|
||||
}
|
||||
|
||||
/// Node is the building block for parse trees. Each node represents an avro
|
||||
/// type. Compound types have leaf nodes that represent the types they are
|
||||
/// composed of.
|
||||
///
|
||||
/// The user does not use the Node object directly, they interface with Schema
|
||||
/// objects.
|
||||
///
|
||||
/// The Node object uses reference-counted pointers. This is so that schemas
|
||||
/// may be reused in other schemas, without needing to worry about memory
|
||||
/// deallocation for nodes that are added to multiple schema parse trees.
|
||||
///
|
||||
/// Node has minimal implementation, serving as an abstract base class for
|
||||
/// different node types.
|
||||
///
|
||||
|
||||
class AVRO_DECL Node : private boost::noncopyable {
|
||||
public:
|
||||
explicit Node(Type type) : type_(type), logicalType_(LogicalType::NONE), locked_(false) {}
|
||||
|
||||
virtual ~Node();
|
||||
|
||||
Type type() const {
|
||||
return type_;
|
||||
}
|
||||
|
||||
LogicalType logicalType() const {
|
||||
return logicalType_;
|
||||
}
|
||||
|
||||
void setLogicalType(LogicalType logicalType);
|
||||
|
||||
void lock() {
|
||||
locked_ = true;
|
||||
}
|
||||
|
||||
bool locked() const {
|
||||
return locked_;
|
||||
}
|
||||
|
||||
virtual bool hasName() const = 0;
|
||||
|
||||
void setName(const Name& name) {
|
||||
checkLock();
|
||||
checkName(name);
|
||||
doSetName(name);
|
||||
}
|
||||
virtual const Name& name() const = 0;
|
||||
|
||||
virtual const std::string& getDoc() const = 0;
|
||||
void setDoc(const std::string& doc) {
|
||||
checkLock();
|
||||
doSetDoc(doc);
|
||||
}
|
||||
|
||||
void addLeaf(const NodePtr& newLeaf) {
|
||||
checkLock();
|
||||
doAddLeaf(newLeaf);
|
||||
}
|
||||
virtual size_t leaves() const = 0;
|
||||
virtual const NodePtr& leafAt(size_t index) const = 0;
|
||||
virtual const GenericDatum& defaultValueAt(size_t index) {
|
||||
throw Exception("No default value at: {}", index);
|
||||
}
|
||||
|
||||
void addName(const std::string& name) {
|
||||
checkLock();
|
||||
checkName(Name(name));
|
||||
doAddName(name);
|
||||
}
|
||||
virtual size_t names() const = 0;
|
||||
virtual const std::string& nameAt(size_t index) const = 0;
|
||||
virtual bool nameIndex(const std::string& name, size_t& index) const = 0;
|
||||
|
||||
void setFixedSize(size_t size) {
|
||||
checkLock();
|
||||
doSetFixedSize(size);
|
||||
}
|
||||
virtual size_t fixedSize() const = 0;
|
||||
|
||||
void addCustomAttributesForField(const CustomAttributes& customAttributes) {
|
||||
checkLock();
|
||||
doAddCustomAttribute(customAttributes);
|
||||
}
|
||||
|
||||
virtual bool isValid() const = 0;
|
||||
|
||||
virtual SchemaResolution resolve(const Node& reader) const = 0;
|
||||
|
||||
virtual void printJson(std::ostream& os, size_t depth) const = 0;
|
||||
|
||||
virtual void printBasicInfo(std::ostream& os) const = 0;
|
||||
|
||||
virtual void setLeafToSymbolic(size_t index, const NodePtr& node) = 0;
|
||||
|
||||
// Serialize the default value GenericDatum g for the node contained
|
||||
// in a record node.
|
||||
virtual void printDefaultToJson(const GenericDatum& g,
|
||||
std::ostream& os,
|
||||
size_t depth) const = 0;
|
||||
|
||||
protected:
|
||||
void checkLock() const {
|
||||
if (locked()) {
|
||||
throw Exception("Cannot modify locked schema");
|
||||
}
|
||||
}
|
||||
|
||||
virtual void checkName(const Name& name) const {
|
||||
name.check();
|
||||
}
|
||||
|
||||
virtual void doSetName(const Name& name) = 0;
|
||||
virtual void doSetDoc(const std::string& name) = 0;
|
||||
|
||||
virtual void doAddLeaf(const NodePtr& newLeaf) = 0;
|
||||
virtual void doAddName(const std::string& name) = 0;
|
||||
virtual void doSetFixedSize(size_t size) = 0;
|
||||
virtual void doAddCustomAttribute(const CustomAttributes& customAttributes) = 0;
|
||||
|
||||
private:
|
||||
const Type type_;
|
||||
LogicalType logicalType_;
|
||||
bool locked_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
namespace std {
|
||||
inline std::ostream& operator<<(std::ostream& os, const avro::Node& n) {
|
||||
n.printJson(os, 0);
|
||||
return os;
|
||||
}
|
||||
} // namespace std
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<avro::Name> : fmt::formatter<std::string> {
|
||||
template <typename FormatContext>
|
||||
auto format(const avro::Name& n, FormatContext& ctx) const {
|
||||
return fmt::formatter<std::string>::format(n.fullname(), ctx);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_NodeConcepts_hh__
|
||||
#define avro_NodeConcepts_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
|
||||
#include "Exception.hh"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace avro {
|
||||
|
||||
///
|
||||
/// The concept classes are used to simplify NodeImpl. Since different types
|
||||
/// of avro types carry different attributes, such as names, or field names for
|
||||
/// record members. Using the concept class of NoAttribute vs Attribute, the
|
||||
/// NodeImpl object can enable/disable the attribute, but the code is the same
|
||||
/// in either case.
|
||||
///
|
||||
/// Furthermore, attributes may have different types, for example, most
|
||||
/// attributes are strings, but fixed types have a size attribute, which is
|
||||
/// integer.
|
||||
///
|
||||
/// Since compound types are composed of other types, the leaf attribute
|
||||
/// concepts extend a NodeImpl to include leaf nodes, and attributes for leaf
|
||||
/// nodes, which are used to build parse trees.
|
||||
///
|
||||
///
|
||||
|
||||
namespace concepts {
|
||||
|
||||
template<typename Attribute>
|
||||
struct NoAttribute {
|
||||
static const bool hasAttribute = false;
|
||||
|
||||
size_t size() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void add(const Attribute & /* attr */) {
|
||||
// There must be an add function for the generic NodeImpl, but the
|
||||
// Node APIs ensure that it is never called, the throw here is
|
||||
// just in case
|
||||
throw Exception("This type does not have attribute");
|
||||
}
|
||||
|
||||
const Attribute &get(size_t /* index */ = 0) const {
|
||||
// There must be an get function for the generic NodeImpl, but the
|
||||
// Node APIs ensure that it is never called, the throw here is
|
||||
// just in case
|
||||
throw Exception("This type does not have attribute");
|
||||
}
|
||||
|
||||
Attribute &get(size_t /* index */ = 0) {
|
||||
// There must be an get function for the generic NodeImpl, but the
|
||||
// Node APIs ensure that it is never called, the throw here is
|
||||
// just in case
|
||||
throw Exception("This type does not have attribute");
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Attribute>
|
||||
struct SingleAttribute {
|
||||
static const bool hasAttribute = true;
|
||||
|
||||
SingleAttribute() : attr_() {}
|
||||
|
||||
explicit SingleAttribute(const Attribute &a) : attr_(a) {}
|
||||
// copy constructing from another single attribute is allowed
|
||||
SingleAttribute(const SingleAttribute<Attribute> &rhs) : attr_(rhs.attr_) {}
|
||||
|
||||
// copy constructing from a no attribute is allowed
|
||||
explicit SingleAttribute(const NoAttribute<Attribute> &rhs) : attr_() {}
|
||||
|
||||
size_t size() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
void add(const Attribute &attr) {
|
||||
attr_ = attr;
|
||||
}
|
||||
|
||||
const Attribute &get(size_t index = 0) const {
|
||||
if (index != 0) {
|
||||
throw Exception("SingleAttribute has only 1 value");
|
||||
}
|
||||
return attr_;
|
||||
}
|
||||
|
||||
Attribute &get(size_t index = 0) {
|
||||
if (index != 0) {
|
||||
throw Exception("SingleAttribute has only 1 value");
|
||||
}
|
||||
return attr_;
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename T>
|
||||
friend struct MultiAttribute;
|
||||
Attribute attr_;
|
||||
};
|
||||
|
||||
template<typename Attribute>
|
||||
struct MultiAttribute {
|
||||
static const bool hasAttribute = true;
|
||||
|
||||
MultiAttribute() = default;
|
||||
|
||||
// copy constructing from another single attribute is allowed, it
|
||||
// pushes the attribute
|
||||
explicit MultiAttribute(const SingleAttribute<Attribute> &rhs) {
|
||||
// since map is the only type that does this we know it's
|
||||
// final size will be two, so reserve
|
||||
attrs_.reserve(2);
|
||||
attrs_.push_back(rhs.attr_);
|
||||
}
|
||||
|
||||
MultiAttribute(const MultiAttribute<Attribute> &rhs) : attrs_(rhs.attrs_) {}
|
||||
|
||||
explicit MultiAttribute(const NoAttribute<Attribute> &rhs) {}
|
||||
|
||||
size_t size() const {
|
||||
return attrs_.size();
|
||||
}
|
||||
|
||||
void add(const Attribute &attr) {
|
||||
attrs_.push_back(attr);
|
||||
}
|
||||
|
||||
const Attribute &get(size_t index = 0) const {
|
||||
return attrs_.at(index);
|
||||
}
|
||||
|
||||
Attribute &get(size_t index) {
|
||||
return attrs_.at(index);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<Attribute> attrs_;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct NameIndexConcept {
|
||||
|
||||
bool lookup(const std::string &, size_t &) const {
|
||||
throw Exception("Name index does not exist");
|
||||
}
|
||||
|
||||
bool add(const ::std::string &, size_t) {
|
||||
throw Exception("Name index does not exist");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct NameIndexConcept<MultiAttribute<std::string>> {
|
||||
using IndexMap = std::map<std::string, size_t>;
|
||||
|
||||
bool lookup(const std::string &name, size_t &index) const {
|
||||
auto iter = map_.find(name);
|
||||
if (iter == map_.end()) {
|
||||
return false;
|
||||
}
|
||||
index = iter->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool add(const ::std::string &name, size_t index) {
|
||||
bool added = false;
|
||||
auto lb = map_.lower_bound(name);
|
||||
if (lb == map_.end() || map_.key_comp()(name, lb->first)) {
|
||||
map_.insert(lb, IndexMap::value_type(name, index));
|
||||
added = true;
|
||||
}
|
||||
return added;
|
||||
}
|
||||
|
||||
private:
|
||||
IndexMap map_;
|
||||
};
|
||||
|
||||
} // namespace concepts
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,554 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_NodeImpl_hh__
|
||||
#define avro_NodeImpl_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include "GenericDatum.hh"
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
#include "CustomAttributes.hh"
|
||||
#include "Node.hh"
|
||||
#include "NodeConcepts.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
/// Implementation details for Node. NodeImpl represents all the avro types,
|
||||
/// whose properties are enabled and disabled by selecting concept classes.
|
||||
|
||||
template<
|
||||
class NameConcept,
|
||||
class LeavesConcept,
|
||||
class LeafNamesConcept,
|
||||
class MultiAttributesConcept,
|
||||
class SizeConcept>
|
||||
class NodeImpl : public Node {
|
||||
|
||||
protected:
|
||||
explicit NodeImpl(Type type) : Node(type),
|
||||
nameAttribute_(),
|
||||
docAttribute_(),
|
||||
leafAttributes_(),
|
||||
leafNameAttributes_(),
|
||||
customAttributes_(),
|
||||
sizeAttribute_() {}
|
||||
|
||||
NodeImpl(Type type,
|
||||
const NameConcept &name,
|
||||
const LeavesConcept &leaves,
|
||||
const LeafNamesConcept &leafNames,
|
||||
const MultiAttributesConcept &customAttributes,
|
||||
const SizeConcept &size) : Node(type),
|
||||
nameAttribute_(name),
|
||||
docAttribute_(),
|
||||
leafAttributes_(leaves),
|
||||
leafNameAttributes_(leafNames),
|
||||
customAttributes_(customAttributes),
|
||||
sizeAttribute_(size) {}
|
||||
|
||||
// Ctor with "doc"
|
||||
NodeImpl(Type type,
|
||||
const NameConcept &name,
|
||||
const concepts::SingleAttribute<std::string> &doc,
|
||||
const LeavesConcept &leaves,
|
||||
const LeafNamesConcept &leafNames,
|
||||
const MultiAttributesConcept &customAttributes,
|
||||
const SizeConcept &size) : Node(type),
|
||||
nameAttribute_(name),
|
||||
docAttribute_(doc),
|
||||
leafAttributes_(leaves),
|
||||
leafNameAttributes_(leafNames),
|
||||
customAttributes_(customAttributes),
|
||||
sizeAttribute_(size) {}
|
||||
|
||||
void swap(NodeImpl &impl) {
|
||||
std::swap(nameAttribute_, impl.nameAttribute_);
|
||||
std::swap(docAttribute_, impl.docAttribute_);
|
||||
std::swap(leafAttributes_, impl.leafAttributes_);
|
||||
std::swap(leafNameAttributes_, impl.leafNameAttributes_);
|
||||
std::swap(sizeAttribute_, impl.sizeAttribute_);
|
||||
std::swap(customAttributes_, impl.customAttributes_);
|
||||
std::swap(nameIndex_, impl.nameIndex_);
|
||||
}
|
||||
|
||||
bool hasName() const override {
|
||||
// e.g.: true for single and multi-attributes, false for no-attributes.
|
||||
return NameConcept::hasAttribute;
|
||||
}
|
||||
|
||||
void doSetName(const Name &name) override {
|
||||
nameAttribute_.add(name);
|
||||
}
|
||||
|
||||
const Name &name() const override {
|
||||
return nameAttribute_.get();
|
||||
}
|
||||
|
||||
void doSetDoc(const std::string &doc) override {
|
||||
docAttribute_.add(doc);
|
||||
}
|
||||
|
||||
const std::string &getDoc() const override {
|
||||
return docAttribute_.get();
|
||||
}
|
||||
|
||||
void doAddLeaf(const NodePtr &newLeaf) final {
|
||||
leafAttributes_.add(newLeaf);
|
||||
}
|
||||
|
||||
size_t leaves() const override {
|
||||
return leafAttributes_.size();
|
||||
}
|
||||
|
||||
const NodePtr &leafAt(size_t index) const override {
|
||||
return leafAttributes_.get(index);
|
||||
}
|
||||
|
||||
void doAddName(const std::string &name) override {
|
||||
if (!nameIndex_.add(name, leafNameAttributes_.size())) {
|
||||
throw Exception("Cannot add duplicate name: {}", name);
|
||||
}
|
||||
leafNameAttributes_.add(name);
|
||||
}
|
||||
|
||||
size_t names() const override {
|
||||
return leafNameAttributes_.size();
|
||||
}
|
||||
|
||||
const std::string &nameAt(size_t index) const override {
|
||||
return leafNameAttributes_.get(index);
|
||||
}
|
||||
|
||||
bool nameIndex(const std::string &name, size_t &index) const override {
|
||||
return nameIndex_.lookup(name, index);
|
||||
}
|
||||
|
||||
void doSetFixedSize(size_t size) override {
|
||||
sizeAttribute_.add(size);
|
||||
}
|
||||
|
||||
size_t fixedSize() const override {
|
||||
return sizeAttribute_.get();
|
||||
}
|
||||
|
||||
bool isValid() const override = 0;
|
||||
|
||||
void printBasicInfo(std::ostream &os) const override;
|
||||
|
||||
void setLeafToSymbolic(size_t index, const NodePtr &node) override;
|
||||
|
||||
void doAddCustomAttribute(const CustomAttributes &customAttributes) override {
|
||||
customAttributes_.add(customAttributes);
|
||||
}
|
||||
|
||||
SchemaResolution furtherResolution(const Node &reader) const {
|
||||
SchemaResolution match = RESOLVE_NO_MATCH;
|
||||
|
||||
if (reader.type() == AVRO_SYMBOLIC) {
|
||||
|
||||
// resolve the symbolic type, and check again
|
||||
const NodePtr &node = reader.leafAt(0);
|
||||
match = resolve(*node);
|
||||
} else if (reader.type() == AVRO_UNION) {
|
||||
|
||||
// in this case, need to see if there is an exact match for the
|
||||
// writer's type, or if not, the first one that can be promoted to a
|
||||
// match
|
||||
|
||||
for (size_t i = 0; i < reader.leaves(); ++i) {
|
||||
|
||||
const NodePtr &node = reader.leafAt(i);
|
||||
SchemaResolution thisMatch = resolve(*node);
|
||||
|
||||
// if matched then the search is done
|
||||
if (thisMatch == RESOLVE_MATCH) {
|
||||
match = thisMatch;
|
||||
break;
|
||||
}
|
||||
|
||||
// thisMatch is either no match, or promotable, this will set match to
|
||||
// promotable if it hasn't been set already
|
||||
if (match == RESOLVE_NO_MATCH) {
|
||||
match = thisMatch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
NameConcept nameAttribute_;
|
||||
|
||||
// Rem: NameConcept type is HasName (= SingleAttribute<Name>), we use std::string instead
|
||||
concepts::SingleAttribute<std::string> docAttribute_; /** Doc used to compare schemas */
|
||||
|
||||
LeavesConcept leafAttributes_;
|
||||
LeafNamesConcept leafNameAttributes_;
|
||||
MultiAttributesConcept customAttributes_;
|
||||
SizeConcept sizeAttribute_;
|
||||
concepts::NameIndexConcept<LeafNamesConcept> nameIndex_;
|
||||
};
|
||||
|
||||
using NoName = concepts::NoAttribute<Name>;
|
||||
using HasName = concepts::SingleAttribute<Name>;
|
||||
|
||||
using HasDoc = concepts::SingleAttribute<std::string>;
|
||||
|
||||
using NoLeaves = concepts::NoAttribute<NodePtr>;
|
||||
using SingleLeaf = concepts::SingleAttribute<NodePtr>;
|
||||
using MultiLeaves = concepts::MultiAttribute<NodePtr>;
|
||||
|
||||
using NoLeafNames = concepts::NoAttribute<std::string>;
|
||||
using LeafNames = concepts::MultiAttribute<std::string>;
|
||||
using MultiAttributes = concepts::MultiAttribute<CustomAttributes>;
|
||||
using NoAttributes = concepts::NoAttribute<CustomAttributes>;
|
||||
|
||||
using NoSize = concepts::NoAttribute<size_t>;
|
||||
using HasSize = concepts::SingleAttribute<size_t>;
|
||||
|
||||
using NodeImplPrimitive = NodeImpl<NoName, NoLeaves, NoLeafNames, MultiAttributes, NoSize>;
|
||||
using NodeImplSymbolic = NodeImpl<HasName, NoLeaves, NoLeafNames, NoAttributes, NoSize>;
|
||||
|
||||
using NodeImplRecord = NodeImpl<HasName, MultiLeaves, LeafNames, MultiAttributes, NoSize>;
|
||||
using NodeImplEnum = NodeImpl<HasName, NoLeaves, LeafNames, NoAttributes, NoSize>;
|
||||
using NodeImplArray = NodeImpl<NoName, SingleLeaf, NoLeafNames, NoAttributes, NoSize>;
|
||||
using NodeImplMap = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoAttributes, NoSize>;
|
||||
using NodeImplUnion = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoAttributes, NoSize>;
|
||||
using NodeImplFixed = NodeImpl<HasName, NoLeaves, NoLeafNames, NoAttributes, HasSize>;
|
||||
|
||||
class AVRO_DECL NodePrimitive : public NodeImplPrimitive {
|
||||
public:
|
||||
explicit NodePrimitive(Type type) : NodeImplPrimitive(type) {}
|
||||
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
};
|
||||
|
||||
class AVRO_DECL NodeSymbolic : public NodeImplSymbolic {
|
||||
using NodeWeakPtr = std::weak_ptr<Node>;
|
||||
|
||||
public:
|
||||
NodeSymbolic() : NodeImplSymbolic(AVRO_SYMBOLIC) {}
|
||||
|
||||
explicit NodeSymbolic(const HasName &name) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()) {}
|
||||
|
||||
NodeSymbolic(const HasName &name, const NodePtr &n) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()), actualNode_(n) {}
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
return (nameAttribute_.size() == 1);
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isSet() const {
|
||||
return (actualNode_.lock() != nullptr);
|
||||
}
|
||||
|
||||
NodePtr getNode() const {
|
||||
NodePtr node = actualNode_.lock();
|
||||
if (!node) {
|
||||
throw Exception("Could not follow symbol {}", name());
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
void setNode(const NodePtr &node) {
|
||||
actualNode_ = node;
|
||||
}
|
||||
|
||||
protected:
|
||||
NodeWeakPtr actualNode_;
|
||||
};
|
||||
|
||||
class AVRO_DECL NodeRecord : public NodeImplRecord {
|
||||
std::vector<std::vector<std::string>> fieldsAliases_;
|
||||
std::vector<GenericDatum> fieldsDefaultValues_;
|
||||
|
||||
public:
|
||||
NodeRecord() : NodeImplRecord(AVRO_RECORD) {}
|
||||
|
||||
NodeRecord(const HasName &name, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<GenericDatum> dv);
|
||||
|
||||
NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<GenericDatum> dv);
|
||||
|
||||
NodeRecord(const HasName &name, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
|
||||
std::vector<GenericDatum> dv, const MultiAttributes &customAttributes);
|
||||
|
||||
NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
|
||||
const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
|
||||
std::vector<GenericDatum> dv, const MultiAttributes &customAttributes);
|
||||
|
||||
void swap(NodeRecord &r) {
|
||||
NodeImplRecord::swap(r);
|
||||
fieldsAliases_.swap(r.fieldsAliases_);
|
||||
fieldsDefaultValues_.swap(r.fieldsDefaultValues_);
|
||||
}
|
||||
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
return ((nameAttribute_.size() == 1) && (leafAttributes_.size() == leafNameAttributes_.size()) && (customAttributes_.size() == 0 || customAttributes_.size() == leafAttributes_.size()));
|
||||
}
|
||||
|
||||
const GenericDatum &defaultValueAt(size_t index) override {
|
||||
return fieldsDefaultValues_[index];
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
};
|
||||
|
||||
class AVRO_DECL NodeEnum : public NodeImplEnum {
|
||||
public:
|
||||
NodeEnum() : NodeImplEnum(AVRO_ENUM) {}
|
||||
|
||||
NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoAttributes(), NoSize()) {
|
||||
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
|
||||
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
|
||||
throw Exception("Cannot add duplicate enum: {}", leafNameAttributes_.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
return (
|
||||
(nameAttribute_.size() == 1) && (leafNameAttributes_.size() > 0));
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
};
|
||||
|
||||
class AVRO_DECL NodeArray : public NodeImplArray {
|
||||
public:
|
||||
NodeArray() : NodeImplArray(AVRO_ARRAY) {}
|
||||
|
||||
explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoAttributes(), NoSize()) {}
|
||||
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
return (leafAttributes_.size() == 1);
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
};
|
||||
|
||||
class AVRO_DECL NodeMap : public NodeImplMap {
|
||||
public:
|
||||
NodeMap();
|
||||
|
||||
explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), MultiLeaves(values), NoLeafNames(), NoAttributes(), NoSize()) {
|
||||
// need to add the key for the map too
|
||||
NodePtr key(new NodePrimitive(AVRO_STRING));
|
||||
doAddLeaf(key);
|
||||
|
||||
// key goes before value
|
||||
std::swap(leafAttributes_.get(0), leafAttributes_.get(1));
|
||||
}
|
||||
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
return (leafAttributes_.size() == 2);
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
};
|
||||
|
||||
class AVRO_DECL NodeUnion : public NodeImplUnion {
|
||||
public:
|
||||
NodeUnion() : NodeImplUnion(AVRO_UNION) {}
|
||||
|
||||
explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoAttributes(), NoSize()) {}
|
||||
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
std::set<std::string> seen;
|
||||
if (leafAttributes_.size() >= 1) {
|
||||
for (size_t i = 0; i < leafAttributes_.size(); ++i) {
|
||||
std::string name;
|
||||
const NodePtr &n = leafAttributes_.get(i);
|
||||
switch (n->type()) {
|
||||
case AVRO_STRING:
|
||||
name = "string";
|
||||
break;
|
||||
case AVRO_BYTES:
|
||||
name = "bytes";
|
||||
break;
|
||||
case AVRO_INT:
|
||||
name = "int";
|
||||
break;
|
||||
case AVRO_LONG:
|
||||
name = "long";
|
||||
break;
|
||||
case AVRO_FLOAT:
|
||||
name = "float";
|
||||
break;
|
||||
case AVRO_DOUBLE:
|
||||
name = "double";
|
||||
break;
|
||||
case AVRO_BOOL:
|
||||
name = "bool";
|
||||
break;
|
||||
case AVRO_NULL:
|
||||
name = "null";
|
||||
break;
|
||||
case AVRO_ARRAY:
|
||||
name = "array";
|
||||
break;
|
||||
case AVRO_MAP:
|
||||
name = "map";
|
||||
break;
|
||||
case AVRO_RECORD:
|
||||
case AVRO_ENUM:
|
||||
case AVRO_UNION:
|
||||
case AVRO_FIXED:
|
||||
case AVRO_SYMBOLIC:
|
||||
name = n->name().fullname();
|
||||
break;
|
||||
default: return false;
|
||||
}
|
||||
if (seen.find(name) != seen.end()) {
|
||||
return false;
|
||||
}
|
||||
seen.insert(name);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
};
|
||||
|
||||
class AVRO_DECL NodeFixed : public NodeImplFixed {
|
||||
public:
|
||||
NodeFixed() : NodeImplFixed(AVRO_FIXED) {}
|
||||
|
||||
NodeFixed(const HasName &name, const HasSize &size) : NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), NoAttributes(), size) {}
|
||||
|
||||
SchemaResolution resolve(const Node &reader) const override;
|
||||
|
||||
void printJson(std::ostream &os, size_t depth) const override;
|
||||
|
||||
bool isValid() const override {
|
||||
return (
|
||||
(nameAttribute_.size() == 1) && (sizeAttribute_.size() == 1));
|
||||
}
|
||||
|
||||
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
|
||||
};
|
||||
|
||||
template<class A, class B, class C, class D, class E>
|
||||
inline void
|
||||
NodeImpl<A, B, C, D, E>::setLeafToSymbolic(size_t index, const NodePtr &node) {
|
||||
if (!B::hasAttribute) {
|
||||
throw Exception("Cannot change leaf node for nonexistent leaf");
|
||||
}
|
||||
|
||||
auto &replaceNode = const_cast<NodePtr &>(leafAttributes_.get(index));
|
||||
if (replaceNode->name() != node->name()) {
|
||||
throw Exception("Symbolic name does not match the name of the schema it references");
|
||||
}
|
||||
|
||||
auto symbol = std::make_shared<NodeSymbolic>();
|
||||
symbol->setName(node->name());
|
||||
symbol->setNode(node);
|
||||
replaceNode = symbol;
|
||||
}
|
||||
|
||||
template<class A, class B, class C, class D, class E>
|
||||
inline void
|
||||
NodeImpl<A, B, C, D, E>::printBasicInfo(std::ostream &os) const {
|
||||
os << type();
|
||||
if (hasName()) {
|
||||
os << ' ' << nameAttribute_.get();
|
||||
}
|
||||
|
||||
if (E::hasAttribute) {
|
||||
os << " " << sizeAttribute_.get();
|
||||
}
|
||||
os << '\n';
|
||||
size_t count = leaves();
|
||||
count = count ? count : names();
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
if (C::hasAttribute) {
|
||||
os << "name " << nameAt(i) << '\n';
|
||||
}
|
||||
if (type() != AVRO_SYMBOLIC && leafAttributes_.hasAttribute) {
|
||||
leafAt(i)->printBasicInfo(os);
|
||||
}
|
||||
}
|
||||
if (isCompound(type())) {
|
||||
os << "end " << type() << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
inline NodePtr resolveSymbol(const NodePtr &node) {
|
||||
if (node->type() != AVRO_SYMBOLIC) {
|
||||
throw Exception("Only symbolic nodes may be resolved");
|
||||
}
|
||||
std::shared_ptr<NodeSymbolic> symNode = std::static_pointer_cast<NodeSymbolic>(node);
|
||||
return symNode->getNode();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline std::string intToHex(T i) {
|
||||
std::stringstream stream;
|
||||
stream << "\\u"
|
||||
<< std::setfill('0') << std::setw(sizeof(T))
|
||||
<< std::hex << i;
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Parser_hh__
|
||||
#define avro_Parser_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Reader.hh"
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace avro {
|
||||
|
||||
///
|
||||
/// Class that wraps a reader or ValidatingReade with an interface that uses
|
||||
/// explicit get* names instead of getValue
|
||||
///
|
||||
|
||||
template<class Reader>
|
||||
class Parser : private boost::noncopyable {
|
||||
|
||||
public:
|
||||
// Constructor only works with Writer
|
||||
explicit Parser(const InputBuffer &in) : reader_(in) {}
|
||||
|
||||
/// Constructor only works with ValidatingWriter
|
||||
Parser(const ValidSchema &schema, const InputBuffer &in) : reader_(schema, in) {}
|
||||
|
||||
void readNull() {
|
||||
Null null;
|
||||
reader_.readValue(null);
|
||||
}
|
||||
|
||||
bool readBool() {
|
||||
bool val;
|
||||
reader_.readValue(val);
|
||||
return val;
|
||||
}
|
||||
|
||||
int32_t readInt() {
|
||||
int32_t val;
|
||||
reader_.readValue(val);
|
||||
return val;
|
||||
}
|
||||
|
||||
int64_t readLong() {
|
||||
int64_t val;
|
||||
reader_.readValue(val);
|
||||
return val;
|
||||
}
|
||||
|
||||
float readFloat() {
|
||||
float val;
|
||||
reader_.readValue(val);
|
||||
return val;
|
||||
}
|
||||
|
||||
double readDouble() {
|
||||
double val;
|
||||
reader_.readValue(val);
|
||||
return val;
|
||||
}
|
||||
|
||||
void readString(std::string &val) {
|
||||
reader_.readValue(val);
|
||||
}
|
||||
|
||||
void readBytes(std::vector<uint8_t> &val) {
|
||||
reader_.readBytes(val);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void readFixed(uint8_t (&val)[N]) {
|
||||
reader_.readFixed(val);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void readFixed(std::array<uint8_t, N> &val) {
|
||||
reader_.readFixed(val);
|
||||
}
|
||||
|
||||
void readRecord() {
|
||||
reader_.readRecord();
|
||||
}
|
||||
|
||||
void readRecordEnd() {
|
||||
reader_.readRecordEnd();
|
||||
}
|
||||
|
||||
int64_t readArrayBlockSize() {
|
||||
return reader_.readArrayBlockSize();
|
||||
}
|
||||
|
||||
int64_t readUnion() {
|
||||
return reader_.readUnion();
|
||||
}
|
||||
|
||||
int64_t readEnum() {
|
||||
return reader_.readEnum();
|
||||
}
|
||||
|
||||
int64_t readMapBlockSize() {
|
||||
return reader_.readMapBlockSize();
|
||||
}
|
||||
|
||||
private:
|
||||
friend Type nextType(Parser<ValidatingReader> &p);
|
||||
friend bool currentRecordName(Parser<ValidatingReader> &p, std::string &name);
|
||||
friend bool nextFieldName(Parser<ValidatingReader> &p, std::string &name);
|
||||
|
||||
Reader reader_;
|
||||
};
|
||||
|
||||
inline Type nextType(Parser<ValidatingReader> &p) {
|
||||
return p.reader_.nextType();
|
||||
}
|
||||
|
||||
inline bool currentRecordName(Parser<ValidatingReader> &p, std::string &name) {
|
||||
return p.reader_.currentRecordName(name);
|
||||
}
|
||||
|
||||
inline bool nextFieldName(Parser<ValidatingReader> &p, std::string &name) {
|
||||
return p.reader_.nextFieldName(name);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Reader_hh__
|
||||
#define avro_Reader_hh__
|
||||
|
||||
#include <array>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Types.hh"
|
||||
#include "Validator.hh"
|
||||
#include "Zigzag.hh"
|
||||
#include "buffer/BufferReader.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
///
|
||||
/// Parses from an avro encoding to the requested type. Assumes the next item
|
||||
/// in the avro binary data is the expected type.
|
||||
///
|
||||
|
||||
template<class ValidatorType>
|
||||
class ReaderImpl : private boost::noncopyable {
|
||||
|
||||
public:
|
||||
explicit ReaderImpl(const InputBuffer &buffer) : reader_(buffer) {}
|
||||
|
||||
ReaderImpl(const ValidSchema &schema, const InputBuffer &buffer) : validator_(schema),
|
||||
reader_(buffer) {}
|
||||
|
||||
void readValue(Null &) {
|
||||
validator_.checkTypeExpected(AVRO_NULL);
|
||||
}
|
||||
|
||||
void readValue(bool &val) {
|
||||
validator_.checkTypeExpected(AVRO_BOOL);
|
||||
uint8_t intVal = 0;
|
||||
reader_.read(intVal);
|
||||
val = (intVal != 0);
|
||||
}
|
||||
|
||||
void readValue(int32_t &val) {
|
||||
validator_.checkTypeExpected(AVRO_INT);
|
||||
auto encoded = static_cast<uint32_t>(readVarInt());
|
||||
val = decodeZigzag32(encoded);
|
||||
}
|
||||
|
||||
void readValue(int64_t &val) {
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
uint64_t encoded = readVarInt();
|
||||
val = decodeZigzag64(encoded);
|
||||
}
|
||||
|
||||
void readValue(float &val) {
|
||||
validator_.checkTypeExpected(AVRO_FLOAT);
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} v;
|
||||
reader_.read(v.i);
|
||||
val = v.f;
|
||||
}
|
||||
|
||||
void readValue(double &val) {
|
||||
validator_.checkTypeExpected(AVRO_DOUBLE);
|
||||
union {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} v = {0};
|
||||
reader_.read(v.i);
|
||||
val = v.d;
|
||||
}
|
||||
|
||||
void readValue(std::string &val) {
|
||||
validator_.checkTypeExpected(AVRO_STRING);
|
||||
auto size = static_cast<size_t>(readSize());
|
||||
reader_.read(val, size);
|
||||
}
|
||||
|
||||
void readBytes(std::vector<uint8_t> &val) {
|
||||
validator_.checkTypeExpected(AVRO_BYTES);
|
||||
auto size = static_cast<size_t>(readSize());
|
||||
val.resize(size);
|
||||
reader_.read(reinterpret_cast<char *>(val.data()), size);
|
||||
}
|
||||
|
||||
void readFixed(uint8_t *val, size_t size) {
|
||||
validator_.checkFixedSizeExpected(size);
|
||||
reader_.read(reinterpret_cast<char *>(val), size);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void readFixed(uint8_t (&val)[N]) {
|
||||
this->readFixed(val, N);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void readFixed(std::array<uint8_t, N> &val) {
|
||||
this->readFixed(val.data(), N);
|
||||
}
|
||||
|
||||
void readRecord() {
|
||||
validator_.checkTypeExpected(AVRO_RECORD);
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
validator_.setCount(1);
|
||||
}
|
||||
|
||||
void readRecordEnd() {
|
||||
validator_.checkTypeExpected(AVRO_RECORD);
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
validator_.setCount(0);
|
||||
}
|
||||
|
||||
int64_t readArrayBlockSize() {
|
||||
validator_.checkTypeExpected(AVRO_ARRAY);
|
||||
return readCount();
|
||||
}
|
||||
|
||||
int64_t readUnion() {
|
||||
validator_.checkTypeExpected(AVRO_UNION);
|
||||
return readCount();
|
||||
}
|
||||
|
||||
int64_t readEnum() {
|
||||
validator_.checkTypeExpected(AVRO_ENUM);
|
||||
return readCount();
|
||||
}
|
||||
|
||||
int64_t readMapBlockSize() {
|
||||
validator_.checkTypeExpected(AVRO_MAP);
|
||||
return readCount();
|
||||
}
|
||||
|
||||
Type nextType() const {
|
||||
return validator_.nextTypeExpected();
|
||||
}
|
||||
|
||||
bool currentRecordName(std::string &name) const {
|
||||
return validator_.getCurrentRecordName(name);
|
||||
}
|
||||
|
||||
bool nextFieldName(std::string &name) const {
|
||||
return validator_.getNextFieldName(name);
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t readVarInt() {
|
||||
uint64_t encoded = 0;
|
||||
uint8_t val = 0;
|
||||
int shift = 0;
|
||||
do {
|
||||
reader_.read(val);
|
||||
uint64_t newBits = static_cast<uint64_t>(val & 0x7f) << shift;
|
||||
encoded |= newBits;
|
||||
shift += 7;
|
||||
} while (val & 0x80);
|
||||
|
||||
return encoded;
|
||||
}
|
||||
|
||||
size_t readSize() {
|
||||
uint64_t encoded = readVarInt();
|
||||
auto size = static_cast<size_t>(decodeZigzag64(encoded));
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t readCount() {
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
size_t count = readSize();
|
||||
validator_.setCount(count);
|
||||
return count;
|
||||
}
|
||||
|
||||
ValidatorType validator_;
|
||||
BufferReader reader_;
|
||||
};
|
||||
|
||||
using Reader = ReaderImpl<NullValidator>;
|
||||
using ValidatingReader = ReaderImpl<Validator>;
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Resolver_hh__
|
||||
#define avro_Resolver_hh__
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Reader.hh"
|
||||
|
||||
/// \file Resolver.hh
|
||||
///
|
||||
|
||||
namespace avro {
|
||||
|
||||
class ValidSchema;
|
||||
class Layout;
|
||||
|
||||
class AVRO_DECL Resolver : private boost::noncopyable {
|
||||
public:
|
||||
virtual void parse(Reader &reader, uint8_t *address) const = 0;
|
||||
virtual ~Resolver() = default;
|
||||
};
|
||||
|
||||
std::unique_ptr<Resolver> constructResolver(
|
||||
const ValidSchema &writerSchema,
|
||||
const ValidSchema &readerSchema,
|
||||
const Layout &readerLayout);
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_ResolverSchema_hh__
|
||||
#define avro_ResolverSchema_hh__
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Reader.hh"
|
||||
|
||||
/// \file ResolverSchema.hh
|
||||
///
|
||||
|
||||
namespace avro {
|
||||
|
||||
class ValidSchema;
|
||||
class Layout;
|
||||
class Resolver;
|
||||
|
||||
class AVRO_DECL ResolverSchema {
|
||||
public:
|
||||
ResolverSchema(const ValidSchema &writer, const ValidSchema &reader, const Layout &readerLayout);
|
||||
|
||||
private:
|
||||
friend class ResolvingReader;
|
||||
void parse(Reader &reader, uint8_t *address);
|
||||
std::shared_ptr<Resolver> resolver_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_ResolvingReader_hh__
|
||||
#define avro_ResolvingReader_hh__
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Reader.hh"
|
||||
#include "ResolverSchema.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
class AVRO_DECL ResolvingReader : private boost::noncopyable {
|
||||
|
||||
public:
|
||||
ResolvingReader(const ResolverSchema &schema, const InputBuffer &in) : reader_(in),
|
||||
schema_(schema) {}
|
||||
|
||||
template<typename T>
|
||||
void parse(T &object) {
|
||||
schema_.parse(reader_, reinterpret_cast<uint8_t *>(&object));
|
||||
}
|
||||
|
||||
private:
|
||||
Reader reader_;
|
||||
ResolverSchema schema_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Schema_hh__
|
||||
#define avro_Schema_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include "CustomAttributes.hh"
|
||||
#include "NodeImpl.hh"
|
||||
#include <string>
|
||||
|
||||
/// \file
|
||||
///
|
||||
/// Schemas for representing all the avro types. The compound schema objects
|
||||
/// allow composition from other schemas.
|
||||
///
|
||||
|
||||
namespace avro {
|
||||
|
||||
/// The root Schema object is a base class. Nobody constructs this class directly.
|
||||
|
||||
class AVRO_DECL Schema {
|
||||
public:
|
||||
virtual ~Schema() = default;
|
||||
|
||||
Type type() const {
|
||||
return node_->type();
|
||||
}
|
||||
|
||||
const NodePtr &root() const {
|
||||
return node_;
|
||||
}
|
||||
|
||||
NodePtr &root() {
|
||||
return node_;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit Schema(NodePtr node) : node_(std::move(node)) {}
|
||||
explicit Schema(Node *node) : node_(node) {}
|
||||
|
||||
NodePtr node_;
|
||||
};
|
||||
|
||||
class AVRO_DECL NullSchema : public Schema {
|
||||
public:
|
||||
NullSchema() : Schema(new NodePrimitive(AVRO_NULL)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL BoolSchema : public Schema {
|
||||
public:
|
||||
BoolSchema() : Schema(new NodePrimitive(AVRO_BOOL)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL IntSchema : public Schema {
|
||||
public:
|
||||
IntSchema() : Schema(new NodePrimitive(AVRO_INT)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL LongSchema : public Schema {
|
||||
public:
|
||||
LongSchema() : Schema(new NodePrimitive(AVRO_LONG)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL FloatSchema : public Schema {
|
||||
public:
|
||||
FloatSchema() : Schema(new NodePrimitive(AVRO_FLOAT)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL DoubleSchema : public Schema {
|
||||
public:
|
||||
DoubleSchema() : Schema(new NodePrimitive(AVRO_DOUBLE)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL StringSchema : public Schema {
|
||||
public:
|
||||
StringSchema() : Schema(new NodePrimitive(AVRO_STRING)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL BytesSchema : public Schema {
|
||||
public:
|
||||
BytesSchema() : Schema(new NodePrimitive(AVRO_BYTES)) {}
|
||||
};
|
||||
|
||||
class AVRO_DECL RecordSchema : public Schema {
|
||||
public:
|
||||
explicit RecordSchema(const std::string &name);
|
||||
void addField(const std::string &name, const Schema &fieldSchema);
|
||||
// Add a field with custom attributes
|
||||
void addField(const std::string &name, const Schema &fieldSchema,
|
||||
const CustomAttributes &customAttributes);
|
||||
|
||||
std::string getDoc() const;
|
||||
void setDoc(const std::string &);
|
||||
};
|
||||
|
||||
class AVRO_DECL EnumSchema : public Schema {
|
||||
public:
|
||||
explicit EnumSchema(const std::string &name);
|
||||
void addSymbol(const std::string &symbol);
|
||||
};
|
||||
|
||||
class AVRO_DECL ArraySchema : public Schema {
|
||||
public:
|
||||
explicit ArraySchema(const Schema &itemsSchema);
|
||||
ArraySchema(const ArraySchema &itemsSchema);
|
||||
};
|
||||
|
||||
class AVRO_DECL MapSchema : public Schema {
|
||||
public:
|
||||
explicit MapSchema(const Schema &valuesSchema);
|
||||
MapSchema(const MapSchema &itemsSchema);
|
||||
};
|
||||
|
||||
class AVRO_DECL UnionSchema : public Schema {
|
||||
public:
|
||||
UnionSchema();
|
||||
void addType(const Schema &typeSchema);
|
||||
};
|
||||
|
||||
class AVRO_DECL FixedSchema : public Schema {
|
||||
public:
|
||||
FixedSchema(int size, const std::string &name);
|
||||
};
|
||||
|
||||
class AVRO_DECL SymbolicSchema : public Schema {
|
||||
public:
|
||||
SymbolicSchema(const Name &name, const NodePtr &link);
|
||||
};
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_SchemaResolution_hh__
|
||||
#define avro_SchemaResolution_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
enum SchemaResolution {
|
||||
|
||||
/// The schemas definitely do not match
|
||||
|
||||
RESOLVE_NO_MATCH,
|
||||
|
||||
/// The schemas match at a cursory level
|
||||
///
|
||||
/// For records and enums, this means the name is the same, but it does not
|
||||
/// necessarily mean that every symbol or field is an exact match.
|
||||
|
||||
RESOLVE_MATCH,
|
||||
|
||||
/// For primitives, the matching may occur if the type is promotable. This means that the
|
||||
/// writer matches reader if the writer's type is promoted the specified type.
|
||||
|
||||
//@{
|
||||
|
||||
RESOLVE_PROMOTABLE_TO_LONG,
|
||||
RESOLVE_PROMOTABLE_TO_FLOAT,
|
||||
RESOLVE_PROMOTABLE_TO_DOUBLE,
|
||||
|
||||
//@}
|
||||
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Serializer_hh__
|
||||
#define avro_Serializer_hh__
|
||||
|
||||
#include <array>
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Writer.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
/// Class that wraps a Writer or ValidatingWriter with an interface that uses
|
||||
/// explicit write* names instead of writeValue
|
||||
|
||||
template<class Writer>
|
||||
class Serializer : private boost::noncopyable {
|
||||
|
||||
public:
|
||||
/// Constructor only works with Writer
|
||||
explicit Serializer() : writer_() {}
|
||||
|
||||
/// Constructor only works with ValidatingWriter
|
||||
explicit Serializer(const ValidSchema &schema) : writer_(schema) {}
|
||||
|
||||
void writeNull() {
|
||||
writer_.writeValue(Null());
|
||||
}
|
||||
|
||||
void writeBool(bool val) {
|
||||
writer_.writeValue(val);
|
||||
}
|
||||
|
||||
void writeInt(int32_t val) {
|
||||
writer_.writeValue(val);
|
||||
}
|
||||
|
||||
void writeLong(int64_t val) {
|
||||
writer_.writeValue(val);
|
||||
}
|
||||
|
||||
void writeFloat(float val) {
|
||||
writer_.writeValue(val);
|
||||
}
|
||||
|
||||
void writeDouble(double val) {
|
||||
writer_.writeValue(val);
|
||||
}
|
||||
|
||||
void writeBytes(const void *val, size_t size) {
|
||||
writer_.writeBytes(val, size);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void writeFixed(const uint8_t (&val)[N]) {
|
||||
writer_.writeFixed(val);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void writeFixed(const std::array<uint8_t, N> &val) {
|
||||
writer_.writeFixed(val);
|
||||
}
|
||||
|
||||
void writeString(const std::string &val) {
|
||||
writer_.writeValue(val);
|
||||
}
|
||||
|
||||
void writeRecord() {
|
||||
writer_.writeRecord();
|
||||
}
|
||||
|
||||
void writeRecordEnd() {
|
||||
writer_.writeRecordEnd();
|
||||
}
|
||||
|
||||
void writeArrayBlock(int64_t size) {
|
||||
writer_.writeArrayBlock(size);
|
||||
}
|
||||
|
||||
void writeArrayEnd() {
|
||||
writer_.writeArrayEnd();
|
||||
}
|
||||
|
||||
void writeMapBlock(int64_t size) {
|
||||
writer_.writeMapBlock(size);
|
||||
}
|
||||
|
||||
void writeMapEnd() {
|
||||
writer_.writeMapEnd();
|
||||
}
|
||||
|
||||
void writeUnion(int64_t choice) {
|
||||
writer_.writeUnion(choice);
|
||||
}
|
||||
|
||||
void writeEnum(int64_t choice) {
|
||||
writer_.writeEnum(choice);
|
||||
}
|
||||
|
||||
InputBuffer buffer() const {
|
||||
return writer_.buffer();
|
||||
}
|
||||
|
||||
private:
|
||||
Writer writer_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,357 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Codec_hh__
|
||||
#define avro_Codec_hh__
|
||||
|
||||
#include "array"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "boost/blank.hpp"
|
||||
|
||||
#include "AvroTraits.hh"
|
||||
#include "Config.hh"
|
||||
#include "Decoder.hh"
|
||||
#include "Encoder.hh"
|
||||
|
||||
/**
|
||||
* A bunch of templates and specializations for encoding and decoding
|
||||
* specific types.
|
||||
*
|
||||
* Primitive AVRO types BOOLEAN, INT, LONG, FLOAT, DOUBLE, STRING and BYTES
|
||||
* get decoded to and encoded from C++ types bool, int32_t, int64_t, float,
|
||||
* double, std::string and std::vector<uint8_t> respectively. In addition,
|
||||
* std::vector<T> for arbitrary type T gets encoded as an Avro array of T.
|
||||
* Similarly, std::map<std::string, T> for arbitrary type T gets encoded
|
||||
* as an Avro map with value type T.
|
||||
*
|
||||
* Users can have their custom types encoded/decoded by specializing
|
||||
* avro::codec_traits class for their types.
|
||||
*/
|
||||
namespace avro {
|
||||
|
||||
typedef boost::blank null;
|
||||
|
||||
template<typename T>
|
||||
void encode(Encoder &e, const T &t);
|
||||
template<typename T>
|
||||
void decode(Decoder &d, T &t);
|
||||
|
||||
/**
|
||||
* Codec_traits tells avro how to encode and decode an object of given type.
|
||||
*
|
||||
* The class is expected to have two static methods:
|
||||
* \li static void encode(Encoder& e, const T& value);
|
||||
* \li static void decode(Decoder& e, T& value);
|
||||
* The default is empty.
|
||||
*/
|
||||
template<typename T>
|
||||
struct codec_traits;
|
||||
|
||||
/**
|
||||
* codec_traits for Avro boolean.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<bool> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, bool b) {
|
||||
e.encodeBool(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, bool &b) {
|
||||
b = d.decodeBool();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro int.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<int32_t> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, int32_t i) {
|
||||
e.encodeInt(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, int32_t &i) {
|
||||
i = d.decodeInt();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro long.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<int64_t> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, int64_t l) {
|
||||
e.encodeLong(l);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, int64_t &l) {
|
||||
l = d.decodeLong();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro float.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<float> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, float f) {
|
||||
e.encodeFloat(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, float &f) {
|
||||
f = d.decodeFloat();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro double.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<double> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, double d) {
|
||||
e.encodeDouble(d);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, double &dbl) {
|
||||
dbl = d.decodeDouble();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro string.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<std::string> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, const std::string &s) {
|
||||
e.encodeString(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, std::string &s) {
|
||||
s = d.decodeString();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro bytes.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<std::vector<uint8_t>> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, const std::vector<uint8_t> &b) {
|
||||
e.encodeBytes(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, std::vector<uint8_t> &s) {
|
||||
d.decodeBytes(s);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro fixed.
|
||||
*/
|
||||
template<size_t N>
|
||||
struct codec_traits<std::array<uint8_t, N>> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, const std::array<uint8_t, N> &b) {
|
||||
e.encodeFixed(b.data(), N);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, std::array<uint8_t, N> &s) {
|
||||
std::vector<uint8_t> v(N);
|
||||
d.decodeFixed(N, v);
|
||||
std::copy(v.data(), v.data() + N, s.data());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro arrays.
|
||||
*/
|
||||
template<typename T>
|
||||
struct codec_traits<std::vector<T>> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, const std::vector<T> &b) {
|
||||
e.arrayStart();
|
||||
if (!b.empty()) {
|
||||
e.setItemCount(b.size());
|
||||
for (typename std::vector<T>::const_iterator it = b.begin();
|
||||
it != b.end(); ++it) {
|
||||
e.startItem();
|
||||
avro::encode(e, *it);
|
||||
}
|
||||
}
|
||||
e.arrayEnd();
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, std::vector<T> &s) {
|
||||
s.clear();
|
||||
for (size_t n = d.arrayStart(); n != 0; n = d.arrayNext()) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
T t;
|
||||
avro::decode(d, t);
|
||||
s.push_back(std::move(t));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
typedef codec_traits<std::vector<bool>::const_reference> bool_codec_traits;
|
||||
|
||||
template<>
|
||||
struct codec_traits<std::conditional<avro::is_not_defined<bool_codec_traits>::value,
|
||||
std::vector<bool>::const_reference, void>::type> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, std::vector<bool>::const_reference b) {
|
||||
e.encodeBool(b);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro maps.
|
||||
*/
|
||||
template<typename T>
|
||||
struct codec_traits<std::map<std::string, T>> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, const std::map<std::string, T> &b) {
|
||||
e.mapStart();
|
||||
if (!b.empty()) {
|
||||
e.setItemCount(b.size());
|
||||
for (typename std::map<std::string, T>::const_iterator
|
||||
it = b.begin();
|
||||
it != b.end(); ++it) {
|
||||
e.startItem();
|
||||
avro::encode(e, it->first);
|
||||
avro::encode(e, it->second);
|
||||
}
|
||||
}
|
||||
e.mapEnd();
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, std::map<std::string, T> &s) {
|
||||
s.clear();
|
||||
for (size_t n = d.mapStart(); n != 0; n = d.mapNext()) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
std::string k;
|
||||
avro::decode(d, k);
|
||||
T &t = s[std::move(k)];
|
||||
avro::decode(d, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* codec_traits for Avro null.
|
||||
*/
|
||||
template<>
|
||||
struct codec_traits<avro::null> {
|
||||
/**
|
||||
* Encodes a given value.
|
||||
*/
|
||||
static void encode(Encoder &e, const avro::null &) {
|
||||
e.encodeNull();
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes into a given value.
|
||||
*/
|
||||
static void decode(Decoder &d, avro::null &) {
|
||||
d.decodeNull();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generic encoder function that makes use of the codec_traits.
|
||||
*/
|
||||
template<typename T>
|
||||
void encode(Encoder &e, const T &t) {
|
||||
codec_traits<T>::encode(e, t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic decoder function that makes use of the codec_traits.
|
||||
*/
|
||||
template<typename T>
|
||||
void decode(Decoder &d, T &t) {
|
||||
codec_traits<T>::decode(d, t);
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif // avro_Codec_hh__
|
||||
|
|
@ -0,0 +1,477 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Stream_hh__
|
||||
#define avro_Stream_hh__
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "boost/utility.hpp"
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Exception.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* A no-copy input stream.
|
||||
*/
|
||||
class AVRO_DECL InputStream : boost::noncopyable {
|
||||
protected:
|
||||
/**
|
||||
* An empty constructor.
|
||||
*/
|
||||
InputStream() = default;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~InputStream() = default;
|
||||
|
||||
/**
|
||||
* Returns some of available data.
|
||||
*
|
||||
* Returns true if some data is available, false if no more data is
|
||||
* available or an error has occurred.
|
||||
*/
|
||||
virtual bool next(const uint8_t **data, size_t *len) = 0;
|
||||
|
||||
/**
|
||||
* "Returns" back some of the data to the stream. The returned
|
||||
* data must be less than what was obtained in the last call to
|
||||
* next().
|
||||
*/
|
||||
virtual void backup(size_t len) = 0;
|
||||
|
||||
/**
|
||||
* Skips number of bytes specified by len.
|
||||
*/
|
||||
virtual void skip(size_t len) = 0;
|
||||
|
||||
/**
|
||||
* Returns the number of bytes read from this stream so far.
|
||||
* All the bytes made available through next are considered
|
||||
* to be used unless, returned back using backup.
|
||||
*/
|
||||
virtual size_t byteCount() const = 0;
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<InputStream> InputStreamPtr;
|
||||
|
||||
/**
|
||||
* An InputStream which also supports seeking to a specific offset.
|
||||
*/
|
||||
class AVRO_DECL SeekableInputStream : public InputStream {
|
||||
protected:
|
||||
/**
|
||||
* An empty constructor.
|
||||
*/
|
||||
SeekableInputStream() = default;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~SeekableInputStream() override = default;
|
||||
|
||||
/**
|
||||
* Seek to a specific position in the stream. This may invalidate pointers
|
||||
* returned from next(). This will also reset byteCount() to the given
|
||||
* position.
|
||||
*/
|
||||
virtual void seek(int64_t position) = 0;
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<SeekableInputStream> SeekableInputStreamPtr;
|
||||
|
||||
/**
|
||||
* A no-copy output stream.
|
||||
*/
|
||||
class AVRO_DECL OutputStream : boost::noncopyable {
|
||||
protected:
|
||||
/**
|
||||
* An empty constructor.
|
||||
*/
|
||||
OutputStream() = default;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~OutputStream() = default;
|
||||
|
||||
/**
|
||||
* Returns a buffer that can be written into.
|
||||
* On successful return, data has the pointer to the buffer
|
||||
* and len has the number of bytes available at data.
|
||||
*/
|
||||
virtual bool next(uint8_t **data, size_t *len) = 0;
|
||||
|
||||
/**
|
||||
* "Returns" back to the stream some of the buffer obtained
|
||||
* from in the last call to next().
|
||||
*/
|
||||
virtual void backup(size_t len) = 0;
|
||||
|
||||
/**
|
||||
* Number of bytes written so far into this stream. The whole buffer
|
||||
* returned by next() is assumed to be written unless some of
|
||||
* it was returned using backup().
|
||||
*/
|
||||
virtual uint64_t byteCount() const = 0;
|
||||
|
||||
/**
|
||||
* Flushes any data remaining in the buffer to the stream's underlying
|
||||
* store, if any.
|
||||
*/
|
||||
virtual void flush() = 0;
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<OutputStream> OutputStreamPtr;
|
||||
|
||||
/**
|
||||
* Returns a new OutputStream, which grows in memory chunks of specified size.
|
||||
*/
|
||||
AVRO_DECL OutputStreamPtr memoryOutputStream(size_t chunkSize = 4 * 1024);
|
||||
|
||||
/**
|
||||
* Returns a new InputStream, with the data from the given byte array.
|
||||
* It does not copy the data, the byte array should remain valid
|
||||
* until the InputStream is used.
|
||||
*/
|
||||
AVRO_DECL InputStreamPtr memoryInputStream(const uint8_t *data, size_t len);
|
||||
|
||||
/**
|
||||
* Returns a new InputStream with the contents written into an
|
||||
* OutputStream. The output stream must have been returned by
|
||||
* an earlier call to memoryOutputStream(). The contents for the new
|
||||
* InputStream are the snapshot of the output stream. One can construct
|
||||
* any number of memory input stream from a single memory output stream.
|
||||
*/
|
||||
AVRO_DECL InputStreamPtr memoryInputStream(const OutputStream &source);
|
||||
|
||||
/**
|
||||
* Returns the contents written so far into the output stream, which should
|
||||
* be a memory output stream. That is it must have been returned by a previous
|
||||
* call to memoryOutputStream().
|
||||
*/
|
||||
AVRO_DECL std::shared_ptr<std::vector<uint8_t>> snapshot(const OutputStream &source);
|
||||
|
||||
/**
|
||||
* Returns a new OutputStream whose contents would be stored in a file.
|
||||
* Data is written in chunks of given buffer size.
|
||||
*
|
||||
* If there is a file with the given name, it is truncated and overwritten.
|
||||
* If there is no file with the given name, it is created.
|
||||
*/
|
||||
AVRO_DECL OutputStreamPtr fileOutputStream(const char *filename,
|
||||
size_t bufferSize = 8 * 1024);
|
||||
|
||||
/**
|
||||
* Returns a new InputStream whose contents come from the given file.
|
||||
* Data is read in chunks of given buffer size.
|
||||
*/
|
||||
AVRO_DECL InputStreamPtr fileInputStream(
|
||||
const char *filename, size_t bufferSize = 8 * 1024);
|
||||
AVRO_DECL SeekableInputStreamPtr fileSeekableInputStream(
|
||||
const char *filename, size_t bufferSize = 8 * 1024);
|
||||
|
||||
/**
|
||||
* Returns a new OutputStream whose contents will be sent to the given
|
||||
* std::ostream. The std::ostream object should outlive the returned
|
||||
* OutputStream.
|
||||
*/
|
||||
AVRO_DECL OutputStreamPtr ostreamOutputStream(std::ostream &os,
|
||||
size_t bufferSize = 8 * 1024);
|
||||
|
||||
/**
|
||||
* Returns a new InputStream whose contents come from the given
|
||||
* std::istream. The std::istream object should outlive the returned
|
||||
* InputStream.
|
||||
*/
|
||||
AVRO_DECL InputStreamPtr istreamInputStream(
|
||||
std::istream &in, size_t bufferSize = 8 * 1024);
|
||||
|
||||
/**
|
||||
* Returns a new InputStream whose contents come from the given
|
||||
* std::istream. Use this instead of istreamInputStream if
|
||||
* the istream does not support seekg (e.g. compressed streams).
|
||||
* The returned InputStream would read off bytes instead of seeking.
|
||||
* Of, course it has a performance penalty when reading instead of seeking;
|
||||
* So, use this only when seekg does not work.
|
||||
* The std::istream object should outlive the returned
|
||||
* InputStream.
|
||||
*/
|
||||
AVRO_DECL InputStreamPtr nonSeekableIstreamInputStream(
|
||||
std::istream &is, size_t bufferSize = 8 * 1024);
|
||||
|
||||
/** A convenience class for reading from an InputStream */
|
||||
struct StreamReader {
|
||||
/**
|
||||
* The underlying input stream.
|
||||
*/
|
||||
InputStream *in_;
|
||||
|
||||
/**
|
||||
* The next location to read from.
|
||||
*/
|
||||
const uint8_t *next_;
|
||||
|
||||
/**
|
||||
* One past the last valid location.
|
||||
*/
|
||||
const uint8_t *end_;
|
||||
|
||||
/**
|
||||
* Constructs an empty reader.
|
||||
*/
|
||||
StreamReader() : in_(nullptr), next_(nullptr), end_(nullptr) {}
|
||||
|
||||
/**
|
||||
* Constructs a reader with the given underlying stream.
|
||||
*/
|
||||
explicit StreamReader(InputStream &in) : in_(nullptr), next_(nullptr), end_(nullptr) { reset(in); }
|
||||
|
||||
/**
|
||||
* Replaces the current input stream with the given one after backing up
|
||||
* the original one if required.
|
||||
*/
|
||||
void reset(InputStream &is) {
|
||||
if (in_ != nullptr && end_ != next_) {
|
||||
in_->backup(end_ - next_);
|
||||
}
|
||||
in_ = &is;
|
||||
next_ = end_ = nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read just one byte from the underlying stream. If there are no
|
||||
* more data, throws an exception.
|
||||
*/
|
||||
uint8_t read() {
|
||||
if (next_ == end_) {
|
||||
more();
|
||||
}
|
||||
return *next_++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the given number of bytes from the underlying stream.
|
||||
* If there are not that many bytes, throws an exception.
|
||||
*/
|
||||
void readBytes(uint8_t *b, size_t n) {
|
||||
while (n > 0) {
|
||||
if (next_ == end_) {
|
||||
more();
|
||||
}
|
||||
size_t q = end_ - next_;
|
||||
if (q > n) {
|
||||
q = n;
|
||||
}
|
||||
::memcpy(b, next_, q);
|
||||
next_ += q;
|
||||
b += q;
|
||||
n -= q;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips the given number of bytes. Of there are not so that many
|
||||
* bytes, throws an exception.
|
||||
*/
|
||||
void skipBytes(size_t n) {
|
||||
if (n > static_cast<size_t>(end_ - next_)) {
|
||||
n -= end_ - next_;
|
||||
next_ = end_;
|
||||
in_->skip(n);
|
||||
} else {
|
||||
next_ += n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get as many byes from the underlying stream as possible in a single
|
||||
* chunk.
|
||||
* \return true if some data could be obtained. False is no more
|
||||
* data is available on the stream.
|
||||
*/
|
||||
bool fill() {
|
||||
size_t n = 0;
|
||||
while (in_->next(&next_, &n)) {
|
||||
if (n != 0) {
|
||||
end_ = next_ + n;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to get more data and if it cannot, throws an exception.
|
||||
*/
|
||||
void more() {
|
||||
if (!fill()) {
|
||||
throw Exception("EOF reached");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if and only if the end of stream is not reached.
|
||||
*/
|
||||
bool hasMore() {
|
||||
return next_ != end_ || fill();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns unused bytes back to the underlying stream.
|
||||
* If unRead is true the last byte read is also pushed back.
|
||||
*/
|
||||
void drain(bool unRead) {
|
||||
if (unRead) {
|
||||
--next_;
|
||||
}
|
||||
in_->backup(end_ - next_);
|
||||
end_ = next_;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A convenience class to write data into an OutputStream.
|
||||
*/
|
||||
struct StreamWriter {
|
||||
/**
|
||||
* The underlying output stream for this writer.
|
||||
*/
|
||||
OutputStream *out_;
|
||||
|
||||
/**
|
||||
* The next location to write to.
|
||||
*/
|
||||
uint8_t *next_;
|
||||
|
||||
/**
|
||||
* One past the last location one can write to.
|
||||
*/
|
||||
uint8_t *end_;
|
||||
|
||||
/**
|
||||
* Constructs a writer with no underlying stream.
|
||||
*/
|
||||
StreamWriter() : out_(nullptr), next_(nullptr), end_(nullptr) {}
|
||||
|
||||
/**
|
||||
* Constructs a new writer with the given underlying stream.
|
||||
*/
|
||||
explicit StreamWriter(OutputStream &out) : out_(nullptr), next_(nullptr), end_(nullptr) { reset(out); }
|
||||
|
||||
/**
|
||||
* Replaces the current underlying stream with a new one.
|
||||
* If required, it backs up unused bytes in the previous stream.
|
||||
*/
|
||||
void reset(OutputStream &os) {
|
||||
if (out_ != nullptr && end_ != next_) {
|
||||
out_->backup(end_ - next_);
|
||||
}
|
||||
out_ = &os;
|
||||
next_ = end_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a single byte.
|
||||
*/
|
||||
void write(uint8_t c) {
|
||||
if (next_ == end_) {
|
||||
more();
|
||||
}
|
||||
*next_++ = c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the specified number of bytes starting at \p b.
|
||||
*/
|
||||
void writeBytes(const uint8_t *b, size_t n) {
|
||||
while (n > 0) {
|
||||
if (next_ == end_) {
|
||||
more();
|
||||
}
|
||||
size_t q = end_ - next_;
|
||||
if (q > n) {
|
||||
q = n;
|
||||
}
|
||||
::memcpy(next_, b, q);
|
||||
next_ += q;
|
||||
b += q;
|
||||
n -= q;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* backs up upto the currently written data and flushes the
|
||||
* underlying stream.
|
||||
*/
|
||||
void flush() {
|
||||
if (next_ != end_) {
|
||||
out_->backup(end_ - next_);
|
||||
next_ = end_;
|
||||
}
|
||||
out_->flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of bytes written so far. For a meaningful
|
||||
* result, call this after a flush().
|
||||
*/
|
||||
int64_t byteCount() const {
|
||||
return out_->byteCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets more space to write to. Throws an exception it cannot.
|
||||
*/
|
||||
void more() {
|
||||
size_t n = 0;
|
||||
while (out_->next(&next_, &n)) {
|
||||
if (n != 0) {
|
||||
end_ = next_ + n;
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception("EOF reached");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A convenience function to copy all the contents of an input stream into
|
||||
* an output stream.
|
||||
*/
|
||||
inline void copy(InputStream &in, OutputStream &out) {
|
||||
const uint8_t *p = nullptr;
|
||||
size_t n = 0;
|
||||
StreamWriter w(out);
|
||||
while (in.next(&p, &n)) {
|
||||
w.writeBytes(p, n);
|
||||
}
|
||||
w.flush();
|
||||
}
|
||||
|
||||
} // namespace avro
|
||||
#endif
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Types_hh__
|
||||
#define avro_Types_hh__
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "Config.hh"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* The "type" for the schema.
|
||||
*/
|
||||
enum Type {
|
||||
|
||||
AVRO_STRING, /*!< String */
|
||||
AVRO_BYTES, /*!< Sequence of variable length bytes data */
|
||||
AVRO_INT, /*!< 32-bit integer */
|
||||
AVRO_LONG, /*!< 64-bit integer */
|
||||
AVRO_FLOAT, /*!< Floating point number */
|
||||
AVRO_DOUBLE, /*!< Double precision floating point number */
|
||||
AVRO_BOOL, /*!< Boolean value */
|
||||
AVRO_NULL, /*!< Null */
|
||||
|
||||
AVRO_RECORD, /*!< Record, a sequence of fields */
|
||||
AVRO_ENUM, /*!< Enumeration */
|
||||
AVRO_ARRAY, /*!< Homogeneous array of some specific type */
|
||||
AVRO_MAP, /*!< Homogeneous map from string to some specific type */
|
||||
AVRO_UNION, /*!< Union of one or more types */
|
||||
AVRO_FIXED, /*!< Fixed number of bytes */
|
||||
|
||||
AVRO_NUM_TYPES, /*!< Marker */
|
||||
|
||||
// The following is a pseudo-type used in implementation
|
||||
|
||||
AVRO_SYMBOLIC = AVRO_NUM_TYPES, /*!< User internally to avoid circular references. */
|
||||
AVRO_UNKNOWN = -1 /*!< Used internally. */
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns true if and only if the given type is a primitive.
|
||||
* Primitive types are: string, bytes, int, long, float, double, boolean
|
||||
* and null
|
||||
*/
|
||||
inline constexpr bool isPrimitive(Type t) noexcept {
|
||||
return (t >= AVRO_STRING) && (t < AVRO_RECORD);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if and only if the given type is a non primitive valid type.
|
||||
* Primitive types are: string, bytes, int, long, float, double, boolean
|
||||
* and null
|
||||
*/
|
||||
inline constexpr bool isCompound(Type t) noexcept {
|
||||
return (t >= AVRO_RECORD) && (t < AVRO_NUM_TYPES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if and only if the given type is a valid avro type.
|
||||
*/
|
||||
inline constexpr bool isAvroType(Type t) noexcept {
|
||||
return (t >= AVRO_STRING) && (t < AVRO_NUM_TYPES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if and only if the given type is within the valid range
|
||||
* of enumeration.
|
||||
*/
|
||||
inline constexpr bool isAvroTypeOrPseudoType(Type t) noexcept {
|
||||
return (t >= AVRO_STRING) && (t <= AVRO_NUM_TYPES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the given type into a string. Useful for generating messages.
|
||||
*/
|
||||
AVRO_DECL const std::string& toString(Type type) noexcept;
|
||||
|
||||
/**
|
||||
* Writes a string form of the given type into the given ostream.
|
||||
*/
|
||||
AVRO_DECL std::ostream& operator<<(std::ostream& os, avro::Type type);
|
||||
|
||||
/// define a type to represent Avro Null in template functions
|
||||
struct AVRO_DECL Null{};
|
||||
|
||||
/**
|
||||
* Writes schema for null \p null type to \p os.
|
||||
* \param os The ostream to write to.
|
||||
* \param null The value to be written.
|
||||
*/
|
||||
std::ostream& operator<<(std::ostream& os, const Null& null);
|
||||
|
||||
} // namespace avro
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<avro::Type> : fmt::formatter<std::string> {
|
||||
template <typename FormatContext>
|
||||
auto format(avro::Type t, FormatContext& ctx) const {
|
||||
return fmt::formatter<std::string>::format(avro::toString(t), ctx);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_ValidSchema_hh__
|
||||
#define avro_ValidSchema_hh__
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Node.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
class AVRO_DECL Schema;
|
||||
|
||||
/// A ValidSchema is basically a non-mutable Schema that has passed some
|
||||
/// minimum of sanity checks. Once validated, any Schema that is part of
|
||||
/// this ValidSchema is considered locked, and cannot be modified (an attempt
|
||||
/// to modify a locked Schema will throw). Also, as it is validated, any
|
||||
/// recursive duplications of schemas are replaced with symbolic links to the
|
||||
/// original.
|
||||
///
|
||||
/// Once a Schema is converted to a valid schema it can be used in validating
|
||||
/// parsers/serializers, converted to a json schema, etc.
|
||||
///
|
||||
|
||||
class AVRO_DECL ValidSchema {
|
||||
public:
|
||||
explicit ValidSchema(NodePtr root);
|
||||
explicit ValidSchema(const Schema &schema);
|
||||
ValidSchema();
|
||||
|
||||
void setSchema(const Schema &schema);
|
||||
|
||||
const NodePtr &root() const {
|
||||
return root_;
|
||||
}
|
||||
|
||||
void toJson(std::ostream &os) const;
|
||||
std::string toJson(bool prettyPrint = true) const;
|
||||
|
||||
void toFlatList(std::ostream &os) const;
|
||||
|
||||
protected:
|
||||
NodePtr root_;
|
||||
|
||||
private:
|
||||
static std::string compactSchema(const std::string &schema);
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Validating_hh__
|
||||
#define avro_Validating_hh__
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Types.hh"
|
||||
#include "ValidSchema.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
class AVRO_DECL NullValidator : private boost::noncopyable {
|
||||
public:
|
||||
explicit NullValidator(const ValidSchema &) {}
|
||||
NullValidator() = default;
|
||||
|
||||
void setCount(size_t) {}
|
||||
|
||||
static bool typeIsExpected(Type) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static Type nextTypeExpected() {
|
||||
return AVRO_UNKNOWN;
|
||||
}
|
||||
|
||||
static size_t nextSizeExpected() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool getCurrentRecordName(std::string &) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool getNextFieldName(std::string &) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void checkTypeExpected(Type) {}
|
||||
void checkFixedSizeExpected(size_t) {}
|
||||
};
|
||||
|
||||
/// This class is used by both the ValidatingSerializer and ValidationParser
|
||||
/// objects. It advances the parse tree (containing logic how to advance
|
||||
/// through the various compound types, for example a record must advance
|
||||
/// through all leaf nodes but a union only skips to one), and reports which
|
||||
/// type is next.
|
||||
|
||||
class AVRO_DECL Validator : private boost::noncopyable {
|
||||
public:
|
||||
explicit Validator(ValidSchema schema);
|
||||
|
||||
void setCount(size_t val);
|
||||
|
||||
bool typeIsExpected(Type type) const {
|
||||
return (expectedTypesFlag_ & typeToFlag(type)) != 0;
|
||||
}
|
||||
|
||||
Type nextTypeExpected() const {
|
||||
return nextType_;
|
||||
}
|
||||
|
||||
size_t nextSizeExpected() const;
|
||||
|
||||
bool getCurrentRecordName(std::string &name) const;
|
||||
bool getNextFieldName(std::string &name) const;
|
||||
|
||||
void checkTypeExpected(Type type) {
|
||||
if (!typeIsExpected(type)) {
|
||||
throw Exception("Type {} does not match schema {}", type, nextType_);
|
||||
}
|
||||
advance();
|
||||
}
|
||||
|
||||
void checkFixedSizeExpected(size_t size) {
|
||||
if (nextSizeExpected() != size) {
|
||||
throw Exception("Wrong size for fixed, got {}, expected {}", size, nextSizeExpected());
|
||||
}
|
||||
checkTypeExpected(AVRO_FIXED);
|
||||
}
|
||||
|
||||
private:
|
||||
using flag_t = uint32_t;
|
||||
|
||||
static flag_t typeToFlag(Type type) {
|
||||
flag_t flag = 1u << static_cast<flag_t>(type);
|
||||
return flag;
|
||||
}
|
||||
|
||||
void setupOperation(const NodePtr &node);
|
||||
|
||||
void setWaitingForCount();
|
||||
|
||||
void advance();
|
||||
void doAdvance();
|
||||
|
||||
void enumAdvance();
|
||||
bool countingSetup();
|
||||
void countingAdvance();
|
||||
void unionAdvance();
|
||||
void fixedAdvance();
|
||||
|
||||
void setupFlag(Type type);
|
||||
|
||||
const ValidSchema schema_;
|
||||
|
||||
Type nextType_;
|
||||
flag_t expectedTypesFlag_;
|
||||
bool compoundStarted_;
|
||||
bool waitingForCount_;
|
||||
size_t count_;
|
||||
|
||||
struct CompoundType {
|
||||
explicit CompoundType(NodePtr n) : node(std::move(n)), pos(0) {}
|
||||
NodePtr node; ///< save the node
|
||||
size_t pos; ///< track the leaf position to visit
|
||||
};
|
||||
|
||||
std::vector<CompoundType> compoundStack_;
|
||||
std::vector<size_t> counters_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Writer_hh__
|
||||
#define avro_Writer_hh__
|
||||
|
||||
#include <array>
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include "Config.hh"
|
||||
#include "Types.hh"
|
||||
#include "Validator.hh"
|
||||
#include "Zigzag.hh"
|
||||
#include "buffer/Buffer.hh"
|
||||
|
||||
namespace avro {
|
||||
|
||||
/// Class for writing avro data to a stream.
|
||||
|
||||
template<class ValidatorType>
|
||||
class WriterImpl : private boost::noncopyable {
|
||||
|
||||
public:
|
||||
WriterImpl() = default;
|
||||
|
||||
explicit WriterImpl(const ValidSchema &schema) : validator_(schema) {}
|
||||
|
||||
void writeValue(const Null &) {
|
||||
validator_.checkTypeExpected(AVRO_NULL);
|
||||
}
|
||||
|
||||
void writeValue(bool val) {
|
||||
validator_.checkTypeExpected(AVRO_BOOL);
|
||||
int8_t byte = (val != 0);
|
||||
buffer_.writeTo(byte);
|
||||
}
|
||||
|
||||
void writeValue(int32_t val) {
|
||||
validator_.checkTypeExpected(AVRO_INT);
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
std::array<uint8_t, 5> bytes;
|
||||
size_t size = encodeInt32(val, bytes);
|
||||
buffer_.writeTo(reinterpret_cast<const char *>(bytes.data()), size);
|
||||
}
|
||||
|
||||
void writeValue(int64_t val) {
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
putLong(val);
|
||||
}
|
||||
|
||||
void writeValue(float val) {
|
||||
validator_.checkTypeExpected(AVRO_FLOAT);
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} v;
|
||||
|
||||
v.f = val;
|
||||
buffer_.writeTo(v.i);
|
||||
}
|
||||
|
||||
void writeValue(double val) {
|
||||
validator_.checkTypeExpected(AVRO_DOUBLE);
|
||||
union {
|
||||
double d;
|
||||
int64_t i;
|
||||
} v;
|
||||
|
||||
v.d = val;
|
||||
buffer_.writeTo(v.i);
|
||||
}
|
||||
|
||||
void writeValue(const std::string &val) {
|
||||
validator_.checkTypeExpected(AVRO_STRING);
|
||||
putBytes(val.c_str(), val.size());
|
||||
}
|
||||
|
||||
void writeBytes(const void *val, size_t size) {
|
||||
validator_.checkTypeExpected(AVRO_BYTES);
|
||||
putBytes(val, size);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void writeFixed(const uint8_t (&val)[N]) {
|
||||
validator_.checkFixedSizeExpected(N);
|
||||
buffer_.writeTo(reinterpret_cast<const char *>(val), N);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void writeFixed(const std::array<uint8_t, N> &val) {
|
||||
validator_.checkFixedSizeExpected(val.size());
|
||||
buffer_.writeTo(reinterpret_cast<const char *>(val.data()), val.size());
|
||||
}
|
||||
|
||||
void writeRecord() {
|
||||
validator_.checkTypeExpected(AVRO_RECORD);
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
validator_.setCount(1);
|
||||
}
|
||||
|
||||
void writeRecordEnd() {
|
||||
validator_.checkTypeExpected(AVRO_RECORD);
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
validator_.setCount(0);
|
||||
}
|
||||
|
||||
void writeArrayBlock(int64_t size) {
|
||||
validator_.checkTypeExpected(AVRO_ARRAY);
|
||||
writeCount(size);
|
||||
}
|
||||
|
||||
void writeArrayEnd() {
|
||||
writeArrayBlock(0);
|
||||
}
|
||||
|
||||
void writeMapBlock(int64_t size) {
|
||||
validator_.checkTypeExpected(AVRO_MAP);
|
||||
writeCount(size);
|
||||
}
|
||||
|
||||
void writeMapEnd() {
|
||||
writeMapBlock(0);
|
||||
}
|
||||
|
||||
void writeUnion(int64_t choice) {
|
||||
validator_.checkTypeExpected(AVRO_UNION);
|
||||
writeCount(choice);
|
||||
}
|
||||
|
||||
void writeEnum(int64_t choice) {
|
||||
validator_.checkTypeExpected(AVRO_ENUM);
|
||||
writeCount(choice);
|
||||
}
|
||||
|
||||
InputBuffer buffer() const {
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
private:
|
||||
void putLong(int64_t val) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
std::array<uint8_t, 10> bytes;
|
||||
size_t size = encodeInt64(val, bytes);
|
||||
buffer_.writeTo(reinterpret_cast<const char *>(bytes.data()), size);
|
||||
}
|
||||
|
||||
void putBytes(const void *val, size_t size) {
|
||||
putLong(size);
|
||||
buffer_.writeTo(reinterpret_cast<const char *>(val), size);
|
||||
}
|
||||
|
||||
void writeCount(int64_t count) {
|
||||
validator_.checkTypeExpected(AVRO_LONG);
|
||||
validator_.setCount(count);
|
||||
putLong(count);
|
||||
}
|
||||
|
||||
ValidatorType validator_;
|
||||
OutputBuffer buffer_;
|
||||
};
|
||||
|
||||
using Writer = WriterImpl<NullValidator>;
|
||||
using ValidatingWriter = WriterImpl<Validator>;
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Encoding_hh__
|
||||
#define avro_Encoding_hh__
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "Config.hh"
|
||||
/// \file
|
||||
/// Functions for encoding and decoding integers with zigzag compression
|
||||
|
||||
namespace avro {
|
||||
|
||||
AVRO_DECL constexpr uint64_t encodeZigzag64(int64_t input) noexcept {
|
||||
return ((static_cast<uint64_t>(input) << 1) ^ (input >> 63));
|
||||
}
|
||||
AVRO_DECL constexpr int64_t decodeZigzag64(uint64_t input) noexcept {
|
||||
return static_cast<int64_t>(((input >> 1) ^ -(static_cast<int64_t>(input) & 1)));
|
||||
}
|
||||
|
||||
AVRO_DECL constexpr uint32_t encodeZigzag32(int32_t input) noexcept {
|
||||
return (static_cast<uint32_t>(input) << 1) ^ (input >> 31);
|
||||
}
|
||||
AVRO_DECL constexpr int32_t decodeZigzag32(uint32_t input) noexcept {
|
||||
return static_cast<int32_t>(((input >> 1) ^ -(static_cast<int64_t>(input) & 1)));
|
||||
}
|
||||
|
||||
AVRO_DECL size_t encodeInt32(int32_t input, std::array<uint8_t, 5> &output) noexcept;
|
||||
AVRO_DECL size_t encodeInt64(int64_t input, std::array<uint8_t, 10> &output) noexcept;
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,492 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_Buffer_hh__
|
||||
#define avro_Buffer_hh__
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../Config.hh"
|
||||
#include "detail/BufferDetail.hh"
|
||||
#include "detail/BufferDetailIterator.hh"
|
||||
|
||||
/**
|
||||
* \file Buffer.hh
|
||||
*
|
||||
* \brief Definitions for InputBuffer and OutputBuffer classes
|
||||
*
|
||||
**/
|
||||
|
||||
namespace avro {
|
||||
|
||||
class OutputBuffer;
|
||||
class InputBuffer;
|
||||
|
||||
/**
|
||||
* The OutputBuffer (write-only buffer)
|
||||
*
|
||||
* Use cases for OutputBuffer
|
||||
*
|
||||
* - write message to buffer using ostream class or directly
|
||||
* - append messages to headers
|
||||
* - building up streams of messages via append
|
||||
* - converting to read-only buffers for sending
|
||||
* - extracting parts of the messages into read-only buffers
|
||||
*
|
||||
* -# ASIO access:
|
||||
* - write to a buffer(s) by asio using iterator
|
||||
* - convert to read buffer for deserializing
|
||||
*
|
||||
* OutputBuffer is assignable and copy-constructable. On copy or assignment,
|
||||
* only a pointer is copied, so the two resulting copies are identical, so
|
||||
* modifying one will modify both.
|
||||
**/
|
||||
|
||||
class AVRO_DECL OutputBuffer {
|
||||
|
||||
public:
|
||||
typedef detail::size_type size_type;
|
||||
typedef detail::data_type data_type;
|
||||
|
||||
/**
|
||||
* The asio library expects a const_iterator (the const-ness refers to the
|
||||
* fact that the underlying avro of buffers will not be modified, even
|
||||
* though the data in those buffers is being modified). The iterator
|
||||
* provides the list of addresses an operation can write to.
|
||||
**/
|
||||
|
||||
typedef detail::OutputBufferIterator const_iterator;
|
||||
|
||||
/**
|
||||
* Default constructor. Will pre-allocate at least the requested size, but
|
||||
* can grow larger on demand.
|
||||
*
|
||||
* Destructor uses the default, which resets a shared pointer, deleting the
|
||||
* underlying data if no other copies of exist.
|
||||
*
|
||||
* Copy and assignment operators are not explicitly provided because the
|
||||
* default ones work fine. The default makes only a shallow copy, so the
|
||||
* copies will refer to the same memory. This is required by asio
|
||||
* functions, which will implicitly make copies for asynchronous
|
||||
* operations. Therefore, the user must be careful that if they create
|
||||
* multiple copies of the same OutputBuffer, only one is being modified
|
||||
* otherwise undefined behavior may occur.
|
||||
*
|
||||
**/
|
||||
|
||||
explicit OutputBuffer(size_type reserveSize = 0) : pimpl_(new detail::BufferImpl) {
|
||||
if (reserveSize) {
|
||||
reserve(reserveSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve enough space for a wroteTo() operation. When using writeTo(),
|
||||
* the buffer will grow dynamically as needed. But when using the iterator
|
||||
* to write (followed by wroteTo()), data may only be written to the space
|
||||
* available, so this ensures there is enough room in the buffer before
|
||||
* the write operation.
|
||||
**/
|
||||
|
||||
void reserve(size_type reserveSize) {
|
||||
pimpl_->reserveFreeSpace(reserveSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a block of data to the buffer. The buffer size will automatically
|
||||
* grow if the size is larger than what is currently free.
|
||||
**/
|
||||
|
||||
size_type writeTo(const data_type *data, size_type size) {
|
||||
return pimpl_->writeTo(data, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a single value to the buffer. The buffer size will automatically
|
||||
* grow if there is not room for the byte. The value must be a
|
||||
* "fundamental" type, e.g. int, float, etc. (otherwise use the other
|
||||
* writeTo tests).
|
||||
**/
|
||||
|
||||
template<typename T>
|
||||
void writeTo(T val) {
|
||||
pimpl_->writeTo(val, std::is_fundamental<T>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the state of the buffer after writing through the iterator
|
||||
* interface. This function exists primarily for the boost:asio which
|
||||
* writes directly to the buffer using its iterator. In this case, the
|
||||
* internal state of the buffer does not reflect that the data was written
|
||||
* This informs the buffer how much data was written.
|
||||
*
|
||||
* The buffer does not automatically resize in this case, the bytes written
|
||||
* cannot exceed the amount of free space. Attempting to write more will
|
||||
* throw a std::length_error exception.
|
||||
**/
|
||||
|
||||
size_type wroteTo(size_type size) {
|
||||
size_type wrote = 0;
|
||||
if (size) {
|
||||
if (size > freeSpace()) {
|
||||
throw std::length_error("Impossible to write more data than free space");
|
||||
}
|
||||
wrote = pimpl_->wroteTo(size);
|
||||
}
|
||||
return wrote;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the buffer have any data?
|
||||
**/
|
||||
|
||||
bool empty() const {
|
||||
return (pimpl_->size() == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the buffer, in bytes.
|
||||
*/
|
||||
|
||||
size_type size() const {
|
||||
return pimpl_->size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current free space that is available to write to in the
|
||||
* buffer, in bytes. This is not a strict limit in size, as writeTo() can
|
||||
* automatically increase capacity if necessary.
|
||||
**/
|
||||
|
||||
size_type freeSpace() const {
|
||||
return pimpl_->freeSpace();
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the data in the argument to the end of this buffer. The
|
||||
* argument can be either an InputBuffer or OutputBuffer.
|
||||
*
|
||||
**/
|
||||
|
||||
template<class BufferType>
|
||||
void append(const BufferType &buf) {
|
||||
// don't append an empty buffer
|
||||
if (buf.size()) {
|
||||
pimpl_->append(*(buf.pimpl_.get()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an iterator pointing to the first data chunk of this buffer
|
||||
* that may be written to.
|
||||
**/
|
||||
|
||||
const_iterator begin() const {
|
||||
return const_iterator(pimpl_->beginWrite());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the end iterator for writing.
|
||||
**/
|
||||
|
||||
const_iterator end() const {
|
||||
return const_iterator(pimpl_->endWrite());
|
||||
}
|
||||
|
||||
/**
|
||||
* Discard any data in this buffer.
|
||||
**/
|
||||
|
||||
void discardData() {
|
||||
pimpl_->discardData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Discard the specified number of bytes from this data, starting at the beginning.
|
||||
* Throws if the size is greater than the number of bytes.
|
||||
**/
|
||||
|
||||
void discardData(size_t bytes) {
|
||||
if (bytes > 0) {
|
||||
if (bytes < pimpl_->size()) {
|
||||
pimpl_->discardData(bytes);
|
||||
} else if (bytes == pimpl_->size()) {
|
||||
pimpl_->discardData();
|
||||
} else {
|
||||
throw std::out_of_range("trying to discard more data than exists");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove bytes from this buffer, starting from the beginning, and place
|
||||
* them into a new buffer. Throws if the number of requested bytes exceeds
|
||||
* the size of the buffer. Data and freeSpace in the buffer after bytes
|
||||
* remains in this buffer.
|
||||
**/
|
||||
|
||||
InputBuffer extractData(size_type bytes);
|
||||
|
||||
/**
|
||||
* Remove all bytes from this buffer, returning them in a new buffer.
|
||||
* After removing data, some freeSpace may remain in this buffer.
|
||||
**/
|
||||
|
||||
InputBuffer extractData();
|
||||
|
||||
/**
|
||||
* Clone this buffer, creating a copy that contains the same data.
|
||||
**/
|
||||
|
||||
OutputBuffer clone() const {
|
||||
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl(*pimpl_));
|
||||
return OutputBuffer(newImpl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add unmanaged data to the buffer. The buffer will not automatically
|
||||
* free the data, but it will call the supplied function when the data is
|
||||
* no longer referenced by the buffer (or copies of the buffer).
|
||||
**/
|
||||
|
||||
void appendForeignData(const data_type *data, size_type size, const detail::free_func &func) {
|
||||
pimpl_->appendForeignData(data, size, func);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of chunks that contain free space.
|
||||
**/
|
||||
|
||||
size_t numChunks() const {
|
||||
return pimpl_->numFreeChunks();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of chunks that contain data
|
||||
**/
|
||||
|
||||
size_t numDataChunks() const {
|
||||
return pimpl_->numDataChunks();
|
||||
}
|
||||
|
||||
private:
|
||||
friend class InputBuffer;
|
||||
friend class BufferReader;
|
||||
|
||||
explicit OutputBuffer(detail::BufferImpl::SharedPtr pimpl) : pimpl_(std::move(pimpl)) {}
|
||||
|
||||
detail::BufferImpl::SharedPtr pimpl_; ///< Must never be null.
|
||||
};
|
||||
|
||||
/**
|
||||
* The InputBuffer (read-only buffer)
|
||||
*
|
||||
* InputBuffer is an immutable buffer which that may be constructed from an
|
||||
* OutputBuffer, or several of OutputBuffer's methods. Once the data is
|
||||
* transfered to an InputBuffer it cannot be modified, only read (via
|
||||
* BufferReader, istream, or its iterator).
|
||||
*
|
||||
* Assignments and copies are shallow copies.
|
||||
*
|
||||
* -# ASIO access: - iterate using const_iterator for sending messages
|
||||
*
|
||||
**/
|
||||
|
||||
class AVRO_DECL InputBuffer {
|
||||
|
||||
public:
|
||||
typedef detail::size_type size_type;
|
||||
typedef detail::data_type data_type;
|
||||
|
||||
// needed for asio
|
||||
typedef detail::InputBufferIterator const_iterator;
|
||||
|
||||
/**
|
||||
* Default InputBuffer creates an empty buffer.
|
||||
*
|
||||
* Copy/assignment functions use the default ones. They will do a shallow
|
||||
* copy, and because InputBuffer is immutable, the copies will be
|
||||
* identical.
|
||||
*
|
||||
* Destructor also uses the default, which resets a shared pointer,
|
||||
* deleting the underlying data if no other copies of exist.
|
||||
**/
|
||||
|
||||
InputBuffer() : pimpl_(new detail::BufferImpl) {}
|
||||
|
||||
/**
|
||||
* Construct an InputBuffer that contains the contents of an OutputBuffer.
|
||||
* The two buffers will have the same contents, but this copy will be
|
||||
* immutable, while the the OutputBuffer may still be written to.
|
||||
*
|
||||
* If you wish to move the data from the OutputBuffer to a new InputBuffer
|
||||
* (leaving only free space in the OutputBuffer),
|
||||
* OutputBuffer::extractData() will do this more efficiently.
|
||||
*
|
||||
* Implicit conversion is allowed.
|
||||
**/
|
||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||
InputBuffer(const OutputBuffer &src) : pimpl_(new detail::BufferImpl(*src.pimpl_)) {}
|
||||
|
||||
/**
|
||||
* Does the buffer have any data?
|
||||
**/
|
||||
|
||||
bool empty() const {
|
||||
return (pimpl_->size() == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the buffer, in bytes.
|
||||
**/
|
||||
|
||||
size_type size() const {
|
||||
return pimpl_->size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an iterator pointing to the first data chunk of this buffer
|
||||
* that contains data.
|
||||
**/
|
||||
|
||||
const_iterator begin() const {
|
||||
return const_iterator(pimpl_->beginRead());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the end iterator.
|
||||
**/
|
||||
|
||||
const_iterator end() const {
|
||||
return const_iterator(pimpl_->endRead());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of chunks containing data.
|
||||
**/
|
||||
|
||||
size_t numChunks() const {
|
||||
return pimpl_->numDataChunks();
|
||||
}
|
||||
|
||||
private:
|
||||
friend class OutputBuffer; // for append function
|
||||
friend class istreambuf;
|
||||
friend class BufferReader;
|
||||
|
||||
explicit InputBuffer(const detail::BufferImpl::SharedPtr &pimpl) : pimpl_(pimpl) {}
|
||||
|
||||
/**
|
||||
* Class to indicate that a copy of a OutputBuffer to InputBuffer should be
|
||||
* a shallow copy, used to enable reading of the contents of an
|
||||
* OutputBuffer without need to convert it to InputBuffer using a deep
|
||||
* copy. It is private and only used by BufferReader and istreambuf
|
||||
* classes.
|
||||
*
|
||||
* Writing to an OutputBuffer while it is being read may lead to undefined
|
||||
* behavior.
|
||||
**/
|
||||
|
||||
class ShallowCopy {};
|
||||
|
||||
/**
|
||||
* Make a shallow copy of an OutputBuffer in order to read it without
|
||||
* causing conversion overhead.
|
||||
**/
|
||||
InputBuffer(const OutputBuffer &src, const ShallowCopy &) : pimpl_(src.pimpl_) {}
|
||||
|
||||
/**
|
||||
* Make a shallow copy of an InputBuffer. The default copy constructor
|
||||
* already provides shallow copy, this is just provided for generic
|
||||
* algorithms that wish to treat InputBuffer and OutputBuffer in the same
|
||||
* manner.
|
||||
**/
|
||||
|
||||
InputBuffer(const InputBuffer &src, const ShallowCopy &) : pimpl_(src.pimpl_) {}
|
||||
|
||||
detail::BufferImpl::ConstSharedPtr pimpl_; ///< Must never be null.
|
||||
};
|
||||
|
||||
/*
|
||||
* Implementations of some OutputBuffer functions are inlined here
|
||||
* because InputBuffer definition was required before.
|
||||
*/
|
||||
|
||||
inline InputBuffer OutputBuffer::extractData() {
|
||||
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl);
|
||||
if (pimpl_->size()) {
|
||||
pimpl_->extractData(*newImpl);
|
||||
}
|
||||
return InputBuffer(newImpl);
|
||||
}
|
||||
|
||||
inline InputBuffer OutputBuffer::extractData(size_type bytes) {
|
||||
if (bytes > pimpl_->size()) {
|
||||
throw std::out_of_range("trying to extract more data than exists");
|
||||
}
|
||||
|
||||
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl);
|
||||
if (bytes > 0) {
|
||||
if (bytes < pimpl_->size()) {
|
||||
pimpl_->extractData(*newImpl, bytes);
|
||||
} else {
|
||||
pimpl_->extractData(*newImpl);
|
||||
}
|
||||
}
|
||||
|
||||
return InputBuffer(newImpl);
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
/**
|
||||
* Create an array of iovec structures from the buffer. This utility is used
|
||||
* to support writev and readv function calls. The caller should ensure the
|
||||
* buffer object is not deleted while using the iovec vector.
|
||||
*
|
||||
* If the BufferType is an InputBuffer, the iovec will point to the data that
|
||||
* already exists in the buffer, for reading.
|
||||
*
|
||||
* If the BufferType is an OutputBuffer, the iovec will point to the free
|
||||
* space, which may be written to. Before writing, the caller should call
|
||||
* OutputBuffer::reserve() to create enough room for the desired write (which
|
||||
* can be verified by calling OutputBuffer::freeSpace()), and after writing,
|
||||
* they MUST call OutputBuffer::wroteTo(), otherwise the buffer will not know
|
||||
* the space is not free anymore.
|
||||
*
|
||||
**/
|
||||
|
||||
template<class BufferType>
|
||||
inline void toIovec(BufferType &buf, std::vector<struct iovec> &iov) {
|
||||
const size_t chunks = buf.numChunks();
|
||||
iov.resize(chunks);
|
||||
typename BufferType::const_iterator iter = buf.begin();
|
||||
for (size_t i = 0; i < chunks; ++i) {
|
||||
iov[i].iov_base = const_cast<typename BufferType::data_type *>(iter->data());
|
||||
iov[i].iov_len = iter->size();
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_BufferPrint_hh__
|
||||
#define avro_BufferPrint_hh__
|
||||
|
||||
#include "BufferReader.hh"
|
||||
#include <cctype>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
/**
|
||||
* \file BufferPrint.hh
|
||||
*
|
||||
* \brief Convenience functions for printing buffer contents
|
||||
**/
|
||||
|
||||
namespace avro {
|
||||
|
||||
namespace detail {
|
||||
|
||||
/**
|
||||
* \fn hexPrint
|
||||
*
|
||||
* Prints a buffer to a stream in the canonical hex+ASCII format,
|
||||
* the same used by the program 'hexdump -C'
|
||||
*
|
||||
**/
|
||||
|
||||
inline void
|
||||
hexPrint(std::ostream &os, BufferReader &reader) {
|
||||
std::ios_base::fmtflags savedFlags = os.flags();
|
||||
|
||||
char sixteenBytes[16];
|
||||
size_t offset = 0;
|
||||
|
||||
os << std::setfill('0');
|
||||
os << std::hex;
|
||||
|
||||
while (reader.bytesRemaining()) {
|
||||
|
||||
os << std::setw(8) << offset << " ";
|
||||
|
||||
size_t inBuffer = reader.read(sixteenBytes, sizeof(sixteenBytes));
|
||||
offset += inBuffer;
|
||||
|
||||
// traverse 8 bytes or inBuffer, whatever is less
|
||||
size_t cnt = std::min(inBuffer, static_cast<size_t>(8));
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < cnt; ++i) {
|
||||
os << std::setw(2);
|
||||
os << (static_cast<int>(sixteenBytes[i]) & 0xff) << ' ';
|
||||
}
|
||||
for (; i < 8; ++i) {
|
||||
os << " ";
|
||||
}
|
||||
os << ' ';
|
||||
|
||||
// traverse 16 bytes or inBuffer, whatever is less
|
||||
cnt = std::min(inBuffer, static_cast<size_t>(16));
|
||||
|
||||
for (; i < cnt; ++i) {
|
||||
os << std::setw(2);
|
||||
os << (static_cast<int>(sixteenBytes[i]) & 0xff) << ' ';
|
||||
}
|
||||
for (; i < 16; ++i) {
|
||||
os << " ";
|
||||
}
|
||||
os << " |";
|
||||
for (i = 0; i < inBuffer; ++i) {
|
||||
os.put(isprint(sixteenBytes[i] & 0xff) ? sixteenBytes[i] : '.');
|
||||
}
|
||||
os << "|\n";
|
||||
}
|
||||
|
||||
// restore flags
|
||||
os.flags(savedFlags);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace avro
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const avro::OutputBuffer &buffer) {
|
||||
avro::BufferReader reader(buffer);
|
||||
avro::detail::hexPrint(os, reader);
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const avro::InputBuffer &buffer) {
|
||||
avro::BufferReader reader(buffer);
|
||||
avro::detail::hexPrint(os, reader);
|
||||
return os;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,273 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_BufferReader_hh__
|
||||
#define avro_BufferReader_hh__
|
||||
|
||||
#include "Buffer.hh"
|
||||
#include <type_traits>
|
||||
|
||||
#ifdef min
|
||||
#undef min
|
||||
#endif
|
||||
/**
|
||||
* \file BufferReader.hh
|
||||
*
|
||||
* \brief Helper class for reading bytes from buffer in a streaming manner,
|
||||
* without the overhead of istreams.
|
||||
*
|
||||
**/
|
||||
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* Helper class for reading bytes from buffer without worrying about
|
||||
* chunk boundaries. May read from an InputBuffer or OutputBuffer.
|
||||
*
|
||||
**/
|
||||
class AVRO_DECL BufferReader : private boost::noncopyable {
|
||||
|
||||
public:
|
||||
typedef detail::data_type data_type;
|
||||
typedef detail::size_type size_type;
|
||||
|
||||
private:
|
||||
size_type chunkRemaining() const {
|
||||
return iter_->dataSize() - chunkPos_;
|
||||
}
|
||||
|
||||
void incrementChunk(size_type howMuch) {
|
||||
bytesRemaining_ -= howMuch;
|
||||
chunkPos_ += howMuch;
|
||||
if (chunkPos_ == iter_->dataSize()) {
|
||||
chunkPos_ = 0;
|
||||
++iter_;
|
||||
}
|
||||
}
|
||||
|
||||
void rewind() {
|
||||
iter_ = bufferImpl_->beginRead();
|
||||
bytesRemaining_ = bytes_;
|
||||
chunkPos_ = 0;
|
||||
}
|
||||
|
||||
const data_type *addr() const {
|
||||
return iter_->tellReadPos() + chunkPos_;
|
||||
}
|
||||
|
||||
public:
|
||||
explicit BufferReader(const InputBuffer &buf) : bufferImpl_(buf.pimpl_),
|
||||
iter_(bufferImpl_->beginRead()),
|
||||
bytes_(bufferImpl_->size()),
|
||||
bytesRemaining_(bytes_),
|
||||
chunkPos_(0) {}
|
||||
|
||||
explicit BufferReader(const OutputBuffer &buf) : bufferImpl_(buf.pimpl_),
|
||||
iter_(bufferImpl_->beginRead()),
|
||||
bytes_(bufferImpl_->size()),
|
||||
bytesRemaining_(bytes_),
|
||||
chunkPos_(0) {}
|
||||
|
||||
/**
|
||||
* How many bytes are still not read from this buffer.
|
||||
**/
|
||||
|
||||
size_type bytesRemaining() const {
|
||||
return bytesRemaining_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a block of data from the front of the buffer.
|
||||
**/
|
||||
|
||||
size_type bytesRead() const {
|
||||
return bytes_ - bytesRemaining_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a block of data from the buffer.
|
||||
**/
|
||||
|
||||
size_type read(data_type *data, size_type size) {
|
||||
|
||||
if (size > bytesRemaining_) {
|
||||
size = bytesRemaining_;
|
||||
}
|
||||
size_type sizeToRead = size;
|
||||
|
||||
while (sizeToRead) {
|
||||
const size_type toRead = std::min(sizeToRead, chunkRemaining());
|
||||
memcpy(data, addr(), toRead);
|
||||
sizeToRead -= toRead;
|
||||
data += toRead;
|
||||
incrementChunk(toRead);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a block of data from the buffer.
|
||||
**/
|
||||
|
||||
bool read(std::string &str, size_type size) {
|
||||
if (size > bytesRemaining_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (size <= chunkRemaining()) {
|
||||
fastStringRead(str, size);
|
||||
} else {
|
||||
slowStringRead(str, size);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a single value from the buffer. The value must be a "fundamental"
|
||||
* type, e.g. int, float, etc. (otherwise use the other writeTo tests).
|
||||
*
|
||||
**/
|
||||
|
||||
template<typename T>
|
||||
bool read(T &val) {
|
||||
return read(val, std::is_fundamental<T>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips a block of data from the buffer.
|
||||
**/
|
||||
|
||||
bool skip(size_type bytes) {
|
||||
bool skipped = false;
|
||||
if (bytes <= bytesRemaining_) {
|
||||
doSkip(bytes);
|
||||
skipped = true;
|
||||
}
|
||||
return skipped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Seek to a position in the buffer.
|
||||
**/
|
||||
|
||||
bool seek(size_type pos) {
|
||||
if (pos > bytes_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_type toSkip = pos;
|
||||
size_type curPos = bytesRead();
|
||||
// if the seek position is ahead, we can use skip to get there
|
||||
if (pos >= curPos) {
|
||||
toSkip -= curPos;
|
||||
}
|
||||
// if the seek position is ahead of the start of the chunk we can back up to
|
||||
// start of the chunk
|
||||
else if (pos >= (curPos - chunkPos_)) {
|
||||
curPos -= chunkPos_;
|
||||
bytesRemaining_ += chunkPos_;
|
||||
chunkPos_ = 0;
|
||||
toSkip -= curPos;
|
||||
} else {
|
||||
rewind();
|
||||
}
|
||||
doSkip(toSkip);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool peek(char &val) {
|
||||
bool ret = (bytesRemaining_ > 0);
|
||||
if (ret) {
|
||||
val = *(addr());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
InputBuffer copyData(size_type bytes) {
|
||||
if (bytes > bytesRemaining_) {
|
||||
// force no copy
|
||||
bytes = 0;
|
||||
}
|
||||
detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl);
|
||||
if (bytes) {
|
||||
bufferImpl_->copyData(*newImpl, iter_, chunkPos_, bytes);
|
||||
doSkip(bytes);
|
||||
}
|
||||
return InputBuffer(newImpl);
|
||||
}
|
||||
|
||||
private:
|
||||
void doSkip(size_type sizeToSkip) {
|
||||
|
||||
while (sizeToSkip) {
|
||||
const size_type toSkip = std::min(sizeToSkip, chunkRemaining());
|
||||
sizeToSkip -= toSkip;
|
||||
incrementChunk(toSkip);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool read(T &val, const std::true_type &) {
|
||||
if (sizeof(T) > bytesRemaining_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sizeof(T) <= chunkRemaining()) {
|
||||
val = *(reinterpret_cast<const T *>(addr()));
|
||||
incrementChunk(sizeof(T));
|
||||
} else {
|
||||
read(reinterpret_cast<data_type *>(&val), sizeof(T));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// An uninstantiable function, that is if boost::is_fundamental check fails
|
||||
template<typename T>
|
||||
bool read(T &val, const std::false_type &) {
|
||||
static_assert(sizeof(T) == 0, "Not a valid type to read");
|
||||
return false;
|
||||
}
|
||||
|
||||
void fastStringRead(std::string &str, size_type sizeToCopy) {
|
||||
str.assign(addr(), sizeToCopy);
|
||||
incrementChunk(sizeToCopy);
|
||||
}
|
||||
|
||||
void slowStringRead(std::string &str, size_type sizeToCopy) {
|
||||
str.clear();
|
||||
str.reserve(sizeToCopy);
|
||||
while (sizeToCopy) {
|
||||
const size_type toCopy = std::min(sizeToCopy, chunkRemaining());
|
||||
str.append(addr(), toCopy);
|
||||
sizeToCopy -= toCopy;
|
||||
incrementChunk(toCopy);
|
||||
}
|
||||
}
|
||||
|
||||
detail::BufferImpl::ConstSharedPtr bufferImpl_;
|
||||
detail::BufferImpl::ChunkList::const_iterator iter_;
|
||||
size_type bytes_;
|
||||
size_type bytesRemaining_;
|
||||
size_type chunkPos_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_BufferStream_hh__
|
||||
#define avro_BufferStream_hh__
|
||||
|
||||
#include "BufferStreambuf.hh"
|
||||
|
||||
/**
|
||||
* \file BufferStream.hh
|
||||
*
|
||||
* \brief Custom istream and ostream classes for use with buffers
|
||||
**/
|
||||
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
*
|
||||
* \brief Custom ostream class for writing to an OutputBuffer
|
||||
*
|
||||
**/
|
||||
|
||||
class AVRO_DECL ostream : public std::ostream {
|
||||
|
||||
public:
|
||||
/// Default constructor, creates a new OutputBuffer.
|
||||
ostream() : std::ostream(&obuf_) {}
|
||||
|
||||
/// Output to a specific buffer.
|
||||
explicit ostream(OutputBuffer &buf) : std::ostream(&obuf_),
|
||||
obuf_(buf) {}
|
||||
|
||||
/// Return the output buffer created by the write operations to this ostream.
|
||||
const OutputBuffer &getBuffer() const {
|
||||
return obuf_.getBuffer();
|
||||
}
|
||||
|
||||
protected:
|
||||
ostreambuf obuf_;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Custom istream class for reading from an InputBuffer.
|
||||
*
|
||||
* If the buffer contains binary data, then it is recommended to only use the
|
||||
* read() and readsome() functions--get() or getline() may be confused if the
|
||||
* binary data happens to contain an EOF character.
|
||||
*
|
||||
* For buffers containing text, the full implementation of istream is safe.
|
||||
*
|
||||
**/
|
||||
|
||||
class AVRO_DECL istream : public std::istream {
|
||||
|
||||
public:
|
||||
/// Constructor, requires an InputBuffer to read from.
|
||||
explicit istream(const InputBuffer &buf) : std::istream(&ibuf_), ibuf_(buf) {}
|
||||
|
||||
/// Constructor, takes an OutputBuffer to read from (by making a shallow copy to an InputBuffer).
|
||||
/// Writing to the OutputBuffer while an istream is using it may lead to undefined behavior.
|
||||
explicit istream(const OutputBuffer &buf) : std::istream(&ibuf_), ibuf_(buf) {}
|
||||
|
||||
/// Return the InputBuffer this stream is reading from.
|
||||
const InputBuffer &getBuffer() const {
|
||||
return ibuf_.getBuffer();
|
||||
}
|
||||
|
||||
protected:
|
||||
istreambuf ibuf_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,242 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_BufferStreambuf_hh__
|
||||
#define avro_BufferStreambuf_hh__
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "Buffer.hh"
|
||||
|
||||
/** \file BufferStreambuf.hh
|
||||
\brief streambuf implementation for istream and ostream.
|
||||
*/
|
||||
|
||||
#ifdef min
|
||||
#undef min
|
||||
#endif
|
||||
namespace avro {
|
||||
|
||||
/**
|
||||
* \brief Implementation of streambuf for use by the Buffer's ostream.
|
||||
*
|
||||
* This class derives from std::streambuf and implements the virtual functions
|
||||
* needed to operate on OutputBuffer. The override functions are overflow and
|
||||
* xsputn. Typically custom streambufs will also override sync for output,
|
||||
* but we have no need since all writes are immediately stored in the buffer.
|
||||
**/
|
||||
|
||||
class AVRO_DECL ostreambuf : public std::streambuf {
|
||||
|
||||
public:
|
||||
/// Default constructor creates a new OutputBuffer.
|
||||
ostreambuf() : std::streambuf(),
|
||||
buffer_() {}
|
||||
|
||||
/// Construct using an existing OutputBuffer.
|
||||
explicit ostreambuf(OutputBuffer &buffer) : std::streambuf(),
|
||||
buffer_(buffer) {}
|
||||
|
||||
/// Return the buffer.
|
||||
const OutputBuffer &getBuffer() const {
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Write a single character to the stream.
|
||||
int_type overflow(int_type c) override {
|
||||
buffer_.writeTo(static_cast<OutputBuffer::data_type>(c));
|
||||
return c;
|
||||
}
|
||||
|
||||
/// Write a block of characters to the stream.
|
||||
std::streamsize xsputn(const char_type *s, std::streamsize n) override {
|
||||
return buffer_.writeTo(s, static_cast<size_t>(n));
|
||||
}
|
||||
|
||||
private:
|
||||
OutputBuffer buffer_;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Implementation of streambuf for use by the Buffer's istream.
|
||||
*
|
||||
* This class derives from std::streambuf and implements the virtual functions
|
||||
* needed to operate on InputBuffer. The override functions are underflow,
|
||||
* seekpos, showmanyc, and seek. This is considered a buffered streambuf,
|
||||
* because it can access a chunk of the InputBuffer at a time, using the
|
||||
* iterator interface. Because the input is already buffered, uflow is not
|
||||
* required. pbackfail is not yet implemented but can be if necessary (the
|
||||
* inherited behavior is to fail, and has yet to be a problem).
|
||||
*
|
||||
**/
|
||||
|
||||
class AVRO_DECL istreambuf : public std::streambuf {
|
||||
|
||||
public:
|
||||
/// Default constructor requires an InputBuffer to read from.
|
||||
explicit istreambuf(InputBuffer buffer) : std::streambuf(),
|
||||
buffer_(std::move(buffer)),
|
||||
basePos_(0),
|
||||
iter_(buffer_.begin()) {
|
||||
setBuffer();
|
||||
}
|
||||
|
||||
/// Default constructor converts an OutputBuffer to an InputBuffer
|
||||
explicit istreambuf(const OutputBuffer &buffer) : std::streambuf(),
|
||||
buffer_(buffer, InputBuffer::ShallowCopy()),
|
||||
basePos_(0),
|
||||
iter_(buffer_.begin()) {
|
||||
setBuffer();
|
||||
}
|
||||
|
||||
/// Return the buffer.
|
||||
const InputBuffer &getBuffer() const {
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/// The current chunk of data is exhausted, read the next chunk.
|
||||
int_type underflow() override {
|
||||
if (iter_ != buffer_.end()) {
|
||||
basePos_ += (egptr() - eback());
|
||||
++iter_;
|
||||
}
|
||||
return setBuffer();
|
||||
}
|
||||
|
||||
/// Get a block of data from the stream. Overrides default behavior
|
||||
/// to ignore eof characters that may reside in the stream.
|
||||
std::streamsize xsgetn(char_type *c, std::streamsize len) override {
|
||||
std::streamsize bytesCopied = 0;
|
||||
|
||||
while (bytesCopied < len) {
|
||||
|
||||
size_t inBuffer = egptr() - gptr();
|
||||
|
||||
if (inBuffer) {
|
||||
auto remaining = static_cast<size_t>(len - bytesCopied);
|
||||
size_t toCopy = std::min(inBuffer, remaining);
|
||||
memcpy(c, gptr(), toCopy);
|
||||
c += toCopy;
|
||||
bytesCopied += toCopy;
|
||||
while (toCopy > static_cast<size_t>(std::numeric_limits<int>::max())) {
|
||||
gbump(std::numeric_limits<int>::max());
|
||||
toCopy -= static_cast<size_t>(std::numeric_limits<int>::max());
|
||||
}
|
||||
gbump(static_cast<int>(toCopy));
|
||||
}
|
||||
|
||||
if (bytesCopied < len) {
|
||||
underflow();
|
||||
if (iter_ == buffer_.end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bytesCopied;
|
||||
}
|
||||
|
||||
/// Special seek override to navigate InputBuffer chunks.
|
||||
pos_type seekoff(off_type off, std::ios::seekdir dir, std::ios_base::openmode) override {
|
||||
|
||||
off_type curpos = basePos_ + (gptr() - eback());
|
||||
off_type newpos = off;
|
||||
|
||||
if (dir == std::ios::cur) {
|
||||
newpos += curpos;
|
||||
} else if (dir == std::ios::end) {
|
||||
newpos += buffer_.size();
|
||||
}
|
||||
// short circuit for tell()
|
||||
if (newpos == curpos) {
|
||||
return curpos;
|
||||
}
|
||||
|
||||
off_type endpos = basePos_ + (egptr() - eback());
|
||||
|
||||
// if the position is after our current buffer make
|
||||
// sure it's not past the end of the buffer
|
||||
if ((newpos > endpos) && (newpos > static_cast<off_type>(buffer_.size()))) {
|
||||
return {-1};
|
||||
}
|
||||
// if the new position is before our current iterator
|
||||
// reset the iterator to the beginning
|
||||
else if (newpos < basePos_) {
|
||||
iter_ = buffer_.begin();
|
||||
basePos_ = 0;
|
||||
setBuffer();
|
||||
endpos = (egptr() - eback());
|
||||
}
|
||||
|
||||
// now if the new position is after the end of the buffer
|
||||
// increase the buffer until it is not
|
||||
while (newpos > endpos) {
|
||||
istreambuf::underflow();
|
||||
endpos = basePos_ + (egptr() - eback());
|
||||
}
|
||||
|
||||
setg(eback(), eback() + (newpos - basePos_), egptr());
|
||||
return newpos;
|
||||
}
|
||||
|
||||
/// Calls seekoff for implemention.
|
||||
pos_type seekpos(pos_type pos, std::ios_base::openmode) override {
|
||||
return istreambuf::seekoff(pos, std::ios::beg, std::ios_base::openmode(0));
|
||||
}
|
||||
|
||||
/// Shows the number of bytes buffered in the current chunk, or next chunk if
|
||||
/// current is exhausted.
|
||||
std::streamsize showmanyc() override {
|
||||
|
||||
// this function only gets called when the current buffer has been
|
||||
// completely read, verify this is the case, and if so, underflow to
|
||||
// fetch the next buffer
|
||||
|
||||
if (egptr() - gptr() == 0) {
|
||||
istreambuf::underflow();
|
||||
}
|
||||
return egptr() - gptr();
|
||||
}
|
||||
|
||||
private:
|
||||
/// Setup the streambuf buffer pointers after updating
|
||||
/// the value of the iterator. Returns the first character
|
||||
/// in the new buffer, or eof if there is no buffer.
|
||||
int_type setBuffer() {
|
||||
int_type ret = traits_type::eof();
|
||||
|
||||
if (iter_ != buffer_.end()) {
|
||||
char *loc = const_cast<char *>(iter_->data());
|
||||
setg(loc, loc, loc + iter_->size());
|
||||
ret = std::char_traits<char>::to_int_type(*gptr());
|
||||
} else {
|
||||
setg(nullptr, nullptr, nullptr);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
const InputBuffer buffer_;
|
||||
off_type basePos_;
|
||||
InputBuffer::const_iterator iter_;
|
||||
};
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,515 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_BufferDetail_hh__
|
||||
#define avro_BufferDetail_hh__
|
||||
|
||||
#include <boost/function.hpp>
|
||||
#include <boost/shared_array.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/utility.hpp>
|
||||
#include <utility>
|
||||
#ifdef HAVE_BOOST_ASIO
|
||||
#include <boost/asio/buffer.hpp>
|
||||
#endif
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <exception>
|
||||
|
||||
/**
|
||||
* \file BufferDetail.hh
|
||||
*
|
||||
* \brief The implementation details for the Buffer class.
|
||||
*
|
||||
**/
|
||||
|
||||
namespace avro {
|
||||
|
||||
namespace detail {
|
||||
|
||||
typedef char data_type;
|
||||
typedef size_t size_type;
|
||||
#ifdef HAVE_BOOST_ASIO
|
||||
typedef boost::asio::const_buffer ConstAsioBuffer;
|
||||
typedef boost::asio::mutable_buffer MutableAsioBuffer;
|
||||
#endif
|
||||
|
||||
/// The size in bytes for blocks backing buffer chunks.
|
||||
const size_type kMinBlockSize = 4096;
|
||||
const size_type kMaxBlockSize = 16384;
|
||||
const size_type kDefaultBlockSize = kMinBlockSize;
|
||||
|
||||
typedef boost::function<void(void)> free_func;
|
||||
|
||||
/**
|
||||
* Simple class to hold a functor that executes on delete
|
||||
**/
|
||||
class CallOnDestroy {
|
||||
public:
|
||||
explicit CallOnDestroy(free_func func) : func_(std::move(func)) {}
|
||||
~CallOnDestroy() {
|
||||
if (func_) {
|
||||
func_();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
free_func func_;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief A chunk is the building block for buffers.
|
||||
*
|
||||
* A chunk is backed by a memory block, and internally it maintains information
|
||||
* about which area of the block it may use, and the portion of this area that
|
||||
* contains valid data. More than one chunk may share the same underlying
|
||||
* block, but the areas should never overlap. Chunk holds a shared pointer to
|
||||
* an array of bytes so that shared blocks are reference counted.
|
||||
*
|
||||
* When a chunk is copied, the copy shares the same underlying buffer, but the
|
||||
* copy receives its own copies of the start/cursor/end pointers, so each copy
|
||||
* can be manipulated independently. This allows different buffers to share
|
||||
* the same non-overlapping parts of a chunk, or even overlapping parts of a
|
||||
* chunk if the situation arises.
|
||||
*
|
||||
**/
|
||||
|
||||
class Chunk {
|
||||
|
||||
public:
|
||||
/// Default constructor, allocates a new underlying block for this chunk.
|
||||
explicit Chunk(size_type size) : underlyingBlock_(new data_type[size]),
|
||||
readPos_(underlyingBlock_.get()),
|
||||
writePos_(readPos_),
|
||||
endPos_(readPos_ + size) {}
|
||||
|
||||
/// Foreign buffer constructor, uses the supplied data for this chunk, and
|
||||
/// only for reading.
|
||||
Chunk(const data_type *data, size_type size, const free_func &func) : callOnDestroy_(new CallOnDestroy(func)),
|
||||
readPos_(const_cast<data_type *>(data)),
|
||||
writePos_(readPos_ + size),
|
||||
endPos_(writePos_) {}
|
||||
|
||||
private:
|
||||
// reference counted object will call a functor when it's destroyed
|
||||
boost::shared_ptr<CallOnDestroy> callOnDestroy_;
|
||||
|
||||
public:
|
||||
/// Remove readable bytes from the front of the chunk by advancing the
|
||||
/// chunk start position.
|
||||
void truncateFront(size_type howMuch) {
|
||||
readPos_ += howMuch;
|
||||
assert(readPos_ <= writePos_);
|
||||
}
|
||||
|
||||
/// Remove readable bytes from the back of the chunk by moving the
|
||||
/// chunk cursor position.
|
||||
void truncateBack(size_type howMuch) {
|
||||
writePos_ -= howMuch;
|
||||
assert(readPos_ <= writePos_);
|
||||
}
|
||||
|
||||
/// Tell the position the next byte may be written to.
|
||||
data_type *tellWritePos() const {
|
||||
return writePos_;
|
||||
}
|
||||
|
||||
/// Tell the position of the first byte containing valid data.
|
||||
const data_type *tellReadPos() const {
|
||||
return readPos_;
|
||||
}
|
||||
|
||||
/// After a write operation, increment the write position.
|
||||
void incrementCursor(size_type howMuch) {
|
||||
writePos_ += howMuch;
|
||||
assert(writePos_ <= endPos_);
|
||||
}
|
||||
|
||||
/// Tell how many bytes of data were written to this chunk.
|
||||
size_type dataSize() const {
|
||||
return (writePos_ - readPos_);
|
||||
}
|
||||
|
||||
/// Tell how many bytes this chunk has available to write to.
|
||||
size_type freeSize() const {
|
||||
return (endPos_ - writePos_);
|
||||
}
|
||||
|
||||
/// Tell how many bytes of data this chunk can hold (used and free).
|
||||
size_type capacity() const {
|
||||
return (endPos_ - readPos_);
|
||||
}
|
||||
|
||||
private:
|
||||
friend bool operator==(const Chunk &lhs, const Chunk &rhs);
|
||||
friend bool operator!=(const Chunk &lhs, const Chunk &rhs);
|
||||
|
||||
// more than one buffer can share an underlying block, so use SharedPtr
|
||||
boost::shared_array<data_type> underlyingBlock_;
|
||||
|
||||
data_type *readPos_; ///< The first readable byte in the block
|
||||
data_type *writePos_; ///< The end of written data and start of free space
|
||||
data_type *endPos_; ///< Marks the end of the usable block area
|
||||
};
|
||||
|
||||
/**
|
||||
* Compare underlying buffers and return true if they are equal
|
||||
**/
|
||||
inline bool operator==(const Chunk &lhs, const Chunk &rhs) {
|
||||
return lhs.underlyingBlock_ == rhs.underlyingBlock_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare underlying buffers and return true if they are unequal
|
||||
**/
|
||||
inline bool operator!=(const Chunk &lhs, const Chunk &rhs) {
|
||||
return lhs.underlyingBlock_ != rhs.underlyingBlock_;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Implementation details for Buffer class
|
||||
*
|
||||
* Internally, BufferImpl keeps two lists of chunks, one list consists entirely of
|
||||
* chunks containing data, and one list which contains chunks with free space.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
class BufferImpl : boost::noncopyable {
|
||||
|
||||
/// Add a new chunk to the list of chunks for this buffer, growing the
|
||||
/// buffer by the default block size.
|
||||
void allocChunkChecked(size_type size = kDefaultBlockSize) {
|
||||
writeChunks_.push_back(Chunk(size));
|
||||
freeSpace_ += writeChunks_.back().freeSize();
|
||||
}
|
||||
|
||||
/// Add a new chunk to the list of chunks for this buffer, growing the
|
||||
/// buffer by the requested size, but within the range of a minimum and
|
||||
/// maximum.
|
||||
void allocChunk(size_type size) {
|
||||
if (size < kMinBlockSize) {
|
||||
size = kMinBlockSize;
|
||||
} else if (size > kMaxBlockSize) {
|
||||
size = kMaxBlockSize;
|
||||
}
|
||||
allocChunkChecked(size);
|
||||
}
|
||||
|
||||
/// Update the state of the chunks after a write operation. This function
|
||||
/// ensures the chunk states are consistent with the write.
|
||||
void postWrite(size_type size) {
|
||||
|
||||
// precondition to this function is that the writeChunk_.front()
|
||||
// contains the data that was just written, so make sure writeChunks_
|
||||
// is not empty:
|
||||
|
||||
assert(size <= freeSpace_ && !writeChunks_.empty());
|
||||
|
||||
// This is probably the one tricky part of BufferImpl. The data that
|
||||
// was written now exists in writeChunks_.front(). Now we must make
|
||||
// sure that same data exists in readChunks_.back().
|
||||
//
|
||||
// There are two cases:
|
||||
//
|
||||
// 1. readChunks_.last() and writeChunk_.front() refer to the same
|
||||
// underlying block, in which case they both just need their cursor
|
||||
// updated to reflect the new state.
|
||||
//
|
||||
// 2. readChunk_.last() is not the same block as writeChunks_.front(),
|
||||
// in which case it should be, since the writeChunk.front() contains
|
||||
// the next bit of data that will be appended to readChunks_, and
|
||||
// therefore needs to be copied there so we can proceed with updating
|
||||
// their state.
|
||||
//
|
||||
|
||||
// if readChunks_ is not the same as writeChunks_.front(), make a copy
|
||||
// of it there
|
||||
|
||||
if (readChunks_.empty() || (readChunks_.back() != writeChunks_.front())) {
|
||||
const Chunk &curChunk = writeChunks_.front();
|
||||
readChunks_.push_back(curChunk);
|
||||
|
||||
// Any data that existed in the write chunk previously doesn't
|
||||
// belong to this buffer (otherwise it would have already been
|
||||
// added to the readChunk_ list). Here, adjust the start of the
|
||||
// readChunk to begin after any data already existing in curChunk
|
||||
|
||||
readChunks_.back().truncateFront(curChunk.dataSize());
|
||||
}
|
||||
|
||||
assert(readChunks_.back().freeSize() == writeChunks_.front().freeSize());
|
||||
|
||||
// update the states of both readChunks_ and writeChunks_ to indicate that they are
|
||||
// holding the new data
|
||||
|
||||
readChunks_.back().incrementCursor(size);
|
||||
writeChunks_.front().incrementCursor(size);
|
||||
size_ += size;
|
||||
freeSpace_ -= size;
|
||||
|
||||
// if there is no more free space in writeChunks_, the next write cannot use
|
||||
// it, so dispose of it now
|
||||
|
||||
if (writeChunks_.front().freeSize() == 0) {
|
||||
writeChunks_.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
typedef std::deque<Chunk> ChunkList;
|
||||
typedef boost::shared_ptr<BufferImpl> SharedPtr;
|
||||
typedef boost::shared_ptr<const BufferImpl> ConstSharedPtr;
|
||||
|
||||
/// Default constructor, creates a buffer without any chunks
|
||||
BufferImpl() : freeSpace_(0),
|
||||
size_(0) {}
|
||||
|
||||
/// Copy constructor, gets a copy of all the chunks with data.
|
||||
BufferImpl(const BufferImpl &src) : readChunks_(src.readChunks_),
|
||||
freeSpace_(0),
|
||||
size_(src.size_) {}
|
||||
|
||||
/// Amount of data held in this buffer.
|
||||
size_type size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
/// Capacity that may be written before the buffer must allocate more memory.
|
||||
size_type freeSpace() const {
|
||||
return freeSpace_;
|
||||
}
|
||||
|
||||
/// Add enough free chunks to make the reservation size available.
|
||||
/// Actual amount may be more (rounded up to next chunk).
|
||||
void reserveFreeSpace(size_type reserveSize) {
|
||||
while (freeSpace_ < reserveSize) {
|
||||
allocChunk(reserveSize - freeSpace_);
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the chunk avro's begin iterator for reading.
|
||||
ChunkList::const_iterator beginRead() const {
|
||||
return readChunks_.begin();
|
||||
}
|
||||
|
||||
/// Return the chunk avro's end iterator for reading.
|
||||
ChunkList::const_iterator endRead() const {
|
||||
return readChunks_.end();
|
||||
}
|
||||
|
||||
/// Return the chunk avro's begin iterator for writing.
|
||||
ChunkList::const_iterator beginWrite() const {
|
||||
return writeChunks_.begin();
|
||||
}
|
||||
|
||||
/// Return the chunk avro's end iterator for writing.
|
||||
ChunkList::const_iterator endWrite() const {
|
||||
return writeChunks_.end();
|
||||
}
|
||||
|
||||
/// Write a single value to buffer, add a new chunk if necessary.
|
||||
template<typename T>
|
||||
void writeTo(T val, const std::true_type &) {
|
||||
if (freeSpace_ && (sizeof(T) <= writeChunks_.front().freeSize())) {
|
||||
// fast path, there's enough room in the writeable chunk to just
|
||||
// straight out copy it
|
||||
*(reinterpret_cast<T *>(writeChunks_.front().tellWritePos())) = val;
|
||||
postWrite(sizeof(T));
|
||||
} else {
|
||||
// need to fixup chunks first, so use the regular memcpy
|
||||
// writeTo method
|
||||
writeTo(reinterpret_cast<data_type *>(&val), sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
/// An uninstantiable function, this is if boost::is_fundamental check fails,
|
||||
/// and will compile-time assert.
|
||||
template<typename T>
|
||||
void writeTo(T /*val*/, const std::false_type &) {
|
||||
BOOST_STATIC_ASSERT(sizeof(T) == 0);
|
||||
}
|
||||
|
||||
/// Write a block of data to the buffer, adding new chunks if necessary.
|
||||
size_type writeTo(const data_type *data, size_type size) {
|
||||
size_type bytesLeft = size;
|
||||
while (bytesLeft) {
|
||||
|
||||
if (freeSpace_ == 0) {
|
||||
allocChunkChecked();
|
||||
}
|
||||
|
||||
Chunk &chunk = writeChunks_.front();
|
||||
size_type toCopy = std::min<size_type>(chunk.freeSize(), bytesLeft);
|
||||
assert(toCopy);
|
||||
memcpy(chunk.tellWritePos(), data, toCopy);
|
||||
postWrite(toCopy);
|
||||
data += toCopy;
|
||||
bytesLeft -= toCopy;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/// Update internal status of chunks after data is written using iterator.
|
||||
size_type wroteTo(size_type size) {
|
||||
assert(size <= freeSpace_);
|
||||
size_type bytesLeft = size;
|
||||
while (bytesLeft) {
|
||||
|
||||
Chunk &chunk = writeChunks_.front();
|
||||
size_type wrote = std::min<size_type>(chunk.freeSize(), bytesLeft);
|
||||
assert(wrote);
|
||||
postWrite(wrote);
|
||||
bytesLeft -= wrote;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/// Append the chunks that have data in src to this buffer
|
||||
void append(const BufferImpl &src) {
|
||||
std::copy(src.readChunks_.begin(), src.readChunks_.end(), std::back_inserter(readChunks_));
|
||||
size_ += src.size_;
|
||||
}
|
||||
|
||||
/// Remove all the chunks that contain data from this buffer.
|
||||
void discardData() {
|
||||
readChunks_.clear();
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
/// Remove the specified amount of data from the chunks, starting at the front.
|
||||
void discardData(size_type bytes) {
|
||||
assert(bytes && bytes <= size_);
|
||||
|
||||
size_type bytesToDiscard = bytes;
|
||||
while (bytesToDiscard) {
|
||||
|
||||
size_t currentSize = readChunks_.front().dataSize();
|
||||
|
||||
// see if entire chunk is discarded
|
||||
if (currentSize <= bytesToDiscard) {
|
||||
readChunks_.pop_front();
|
||||
bytesToDiscard -= currentSize;
|
||||
} else {
|
||||
readChunks_.front().truncateFront(bytesToDiscard);
|
||||
bytesToDiscard = 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_ -= bytes;
|
||||
}
|
||||
|
||||
/// Remove the specified amount of data from the chunks, moving the
|
||||
/// data to dest's chunks
|
||||
void extractData(BufferImpl &dest, size_type bytes) {
|
||||
assert(bytes && bytes <= size_);
|
||||
|
||||
size_type bytesToExtract = bytes;
|
||||
while (bytesToExtract) {
|
||||
|
||||
size_t currentSize = readChunks_.front().dataSize();
|
||||
dest.readChunks_.push_back(readChunks_.front());
|
||||
|
||||
// see if entire chunk was extracted
|
||||
if (currentSize <= bytesToExtract) {
|
||||
readChunks_.pop_front();
|
||||
bytesToExtract -= currentSize;
|
||||
} else {
|
||||
readChunks_.front().truncateFront(bytesToExtract);
|
||||
size_t excess = currentSize - bytesToExtract;
|
||||
dest.readChunks_.back().truncateBack(excess);
|
||||
bytesToExtract = 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_ -= bytes;
|
||||
dest.size_ += bytes;
|
||||
}
|
||||
|
||||
/// Move data from this to the destination, leaving this buffer without data
|
||||
void extractData(BufferImpl &dest) {
|
||||
assert(dest.readChunks_.empty());
|
||||
dest.readChunks_.swap(readChunks_);
|
||||
dest.size_ = size_;
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
/// Copy data to a different buffer by copying the chunks. It's
|
||||
/// a bit like extract, but without modifying the source buffer.
|
||||
static void copyData(BufferImpl &dest,
|
||||
ChunkList::const_iterator iter,
|
||||
size_type offset,
|
||||
size_type bytes) {
|
||||
// now we are positioned to start the copying, copy as many
|
||||
// chunks as we need, the first chunk may have a non-zero offset
|
||||
// if the data to copy is not at the start of the chunk
|
||||
size_type copied = 0;
|
||||
while (copied < bytes) {
|
||||
|
||||
dest.readChunks_.push_back(*iter);
|
||||
|
||||
// offset only applies in the first chunk,
|
||||
// all subsequent chunks are copied from the start
|
||||
dest.readChunks_.back().truncateFront(offset);
|
||||
offset = 0;
|
||||
|
||||
copied += dest.readChunks_.back().dataSize();
|
||||
++iter;
|
||||
}
|
||||
|
||||
// if the last chunk copied has more bytes than we need, truncate it
|
||||
size_type excess = copied - bytes;
|
||||
dest.readChunks_.back().truncateBack(excess);
|
||||
|
||||
dest.size_ += bytes;
|
||||
}
|
||||
|
||||
/// The number of chunks containing data. Used for debugging.
|
||||
size_t numDataChunks() const {
|
||||
return readChunks_.size();
|
||||
}
|
||||
|
||||
/// The number of chunks containing free space (note that an entire chunk
|
||||
/// may not be free). Used for debugging.
|
||||
size_t numFreeChunks() const {
|
||||
return writeChunks_.size();
|
||||
}
|
||||
|
||||
/// Add unmanaged data to the buffer. The buffer will not automatically
|
||||
/// free the data, but it will call the supplied function when the data is
|
||||
/// no longer referenced by the buffer (or copies of the buffer).
|
||||
void appendForeignData(const data_type *data, size_type size, const free_func &func) {
|
||||
readChunks_.push_back(Chunk(data, size, func));
|
||||
size_ += size;
|
||||
}
|
||||
BufferImpl &operator=(const BufferImpl &src) = delete;
|
||||
|
||||
private:
|
||||
ChunkList readChunks_; ///< chunks of this buffer containing data
|
||||
ChunkList writeChunks_; ///< chunks of this buffer containing free space
|
||||
|
||||
size_type freeSpace_; ///< capacity of buffer before allocation required
|
||||
size_type size_; ///< amount of data in buffer
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
211
src/third_party/avro-cpp/dist/include/avro/buffer/detail/BufferDetailIterator.hh
vendored
Normal file
211
src/third_party/avro-cpp/dist/include/avro/buffer/detail/BufferDetailIterator.hh
vendored
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef avro_BufferDetailIterator_hh__
|
||||
#define avro_BufferDetailIterator_hh__
|
||||
|
||||
#include "BufferDetail.hh"
|
||||
|
||||
/**
|
||||
* \file BufferDetailIterator.hh
|
||||
*
|
||||
* \brief The implementation details for the Buffer iterators.
|
||||
**/
|
||||
|
||||
namespace avro {
|
||||
|
||||
namespace detail {
|
||||
|
||||
/**
|
||||
* \brief Implements conversion from a chunk to asio::const_buffer
|
||||
*
|
||||
* Iterators for an InputBuffer will iterate over the avro of chunks, so
|
||||
* internally they contain an iterator. But the iterator needs to be
|
||||
* convertable to an asio buffer for use in boost::asio functions. This class
|
||||
* wraps the iterator with a cast operator to do this conversion.
|
||||
**/
|
||||
|
||||
struct InputIteratorHelper {
|
||||
/// Construct a helper with an unnassigned iterator.
|
||||
InputIteratorHelper() : iter_() {}
|
||||
|
||||
/// Construct a helper with an iterator.
|
||||
explicit InputIteratorHelper(const BufferImpl::ChunkList::const_iterator &iter) : iter_(iter) {}
|
||||
|
||||
/// The location of valid data in this chunk.
|
||||
const data_type *data() const {
|
||||
return iter_->tellReadPos();
|
||||
}
|
||||
|
||||
/// The size of valid data in this chunk.
|
||||
size_type size() const {
|
||||
return iter_->dataSize();
|
||||
}
|
||||
|
||||
/// Conversion operator. It doesn't check for null, because the only
|
||||
/// the only time the chunk should be null is when it's the iterator
|
||||
/// end(), which should never be dereferenced anyway.
|
||||
#ifdef HAVE_BOOST_ASIO
|
||||
operator ConstAsioBuffer() const {
|
||||
return ConstAsioBuffer(data(), size());
|
||||
}
|
||||
#endif
|
||||
|
||||
BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Implements conversion from a chunk to asio::buffer
|
||||
*
|
||||
* Iterators for an OutputBuffer will iterate over the avro of chunks, so
|
||||
* internally they contain an iterator. But the iterator needs to be
|
||||
* convertable to an asio buffer for use in boost::asio functions. This class
|
||||
* wraps the iterator with a cast operator to do this conversion.
|
||||
*/
|
||||
|
||||
struct OutputIteratorHelper {
|
||||
/// Construct a helper with an unnassigned iterator.
|
||||
OutputIteratorHelper() : iter_() {}
|
||||
|
||||
/// Construct a helper with an iterator.
|
||||
explicit OutputIteratorHelper(const BufferImpl::ChunkList::const_iterator &iter) : iter_(iter) {}
|
||||
|
||||
/// The location of the first writable byte in this chunk.
|
||||
data_type *data() const {
|
||||
return iter_->tellWritePos();
|
||||
}
|
||||
|
||||
/// The size of area that can be written in this chunk.
|
||||
size_type size() const {
|
||||
return iter_->freeSize();
|
||||
}
|
||||
|
||||
/// Conversion operator. It doesn't check for null, because the only
|
||||
/// the only time the chunk should be null is when it's the iterator
|
||||
/// end(), which should never be dereferenced anyway.
|
||||
#ifdef HAVE_BOOST_ASIO
|
||||
operator MutableAsioBuffer() const {
|
||||
return MutableAsioBuffer(data(), size());
|
||||
}
|
||||
#endif
|
||||
|
||||
BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Implements the iterator for Buffer, that iterates through the
|
||||
* buffer's chunks.
|
||||
**/
|
||||
|
||||
template<typename Helper>
|
||||
class BufferIterator {
|
||||
|
||||
public:
|
||||
typedef BufferIterator<Helper> this_type;
|
||||
|
||||
/**
|
||||
* @name Typedefs
|
||||
*
|
||||
* STL iterators define the following declarations. According to
|
||||
* boost::asio documentation, the library expects the iterator to be
|
||||
* bidirectional, however this implements only the forward iterator type.
|
||||
* So far this has not created any problems with asio, but may change if
|
||||
* future versions of the asio require it.
|
||||
**/
|
||||
|
||||
//@{
|
||||
typedef std::forward_iterator_tag iterator_category; // this is a lie to appease asio
|
||||
typedef Helper value_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef value_type *pointer;
|
||||
typedef value_type &reference;
|
||||
//@}
|
||||
|
||||
/// Construct an unitialized iterator.
|
||||
BufferIterator() : helper_() {}
|
||||
|
||||
/* The default implementations are good here
|
||||
/// Copy constructor.
|
||||
BufferIterator(const BufferIterator &src) :
|
||||
helper_(src.helper_)
|
||||
{ }
|
||||
/// Assignment.
|
||||
this_type& operator= (const this_type &rhs) {
|
||||
helper_ = rhs.helper_;
|
||||
return *this;
|
||||
}
|
||||
*/
|
||||
|
||||
/// Construct iterator at the position in the buffer's chunk list.
|
||||
explicit BufferIterator(BufferImpl::ChunkList::const_iterator iter) : helper_(iter) {}
|
||||
|
||||
/// Dereference iterator, returns InputIteratorHelper or OutputIteratorHelper wrapper.
|
||||
reference operator*() {
|
||||
return helper_;
|
||||
}
|
||||
|
||||
/// Dereference iterator, returns const InputIteratorHelper or OutputIteratorHelper wrapper.
|
||||
const value_type &operator*() const {
|
||||
return helper_;
|
||||
}
|
||||
|
||||
/// Dereference iterator, returns InputIteratorHelper or OutputIteratorHelper wrapper.
|
||||
pointer operator->() {
|
||||
return &helper_;
|
||||
}
|
||||
|
||||
/// Dereference iterator, returns const InputIteratorHelper or OutputIteratorHelper wrapper.
|
||||
const value_type *operator->() const {
|
||||
return &helper_;
|
||||
}
|
||||
|
||||
/// Increment to next chunk in list, or to end() iterator.
|
||||
this_type &operator++() {
|
||||
++helper_.iter_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Increment to next chunk in list, or to end() iterator.
|
||||
this_type operator++(int) {
|
||||
this_type ret = *this;
|
||||
++helper_.iter_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// True if iterators point to same chunks.
|
||||
bool operator==(const this_type &rhs) const {
|
||||
return (helper_.iter_ == rhs.helper_.iter_);
|
||||
}
|
||||
|
||||
/// True if iterators point to different chunks.
|
||||
bool operator!=(const this_type &rhs) const {
|
||||
return (helper_.iter_ != rhs.helper_.iter_);
|
||||
}
|
||||
|
||||
private:
|
||||
Helper helper_;
|
||||
};
|
||||
|
||||
typedef BufferIterator<InputIteratorHelper> InputBufferIterator;
|
||||
typedef BufferIterator<OutputIteratorHelper> OutputBufferIterator;
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace avro
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
#!/bin/bash
|
||||
# This script downloads and imports Apache Avro C++.
|
||||
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
|
||||
set -vx
|
||||
|
||||
NAME="avro-cpp"
|
||||
VERSION="1.12.0"
|
||||
BRANCH="branch-1.12"
|
||||
|
||||
AVRO_GIT_URL="https://github.com/mongodb-forks/avro.git"
|
||||
|
||||
AVRO_GIT_DIR=$(mktemp -d /tmp/import-avro.XXXXXX)
|
||||
trap "rm -rf $AVRO_GIT_DIR" EXIT
|
||||
|
||||
DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/avro-cpp/dist
|
||||
|
||||
if [[ -d $DEST_DIR ]]; then
|
||||
echo "You must remove '$DEST_DIR' before running $0" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
git clone --depth 1 --branch $BRANCH "$AVRO_GIT_URL" $AVRO_GIT_DIR
|
||||
|
||||
mkdir -p $DEST_DIR
|
||||
|
||||
# Copy only the C++ implementation
|
||||
SELECTED=(
|
||||
lang/c++/impl
|
||||
lang/c++/include
|
||||
lang/c++/AUTHORS
|
||||
lang/c++/LICENSE
|
||||
lang/c++/NOTICE
|
||||
lang/c++/README
|
||||
lang/c++/NEWS
|
||||
lang/c++/ChangeLog
|
||||
)
|
||||
|
||||
for item in "${SELECTED[@]}"; do
|
||||
if [[ -e "$AVRO_GIT_DIR/$item" ]]; then
|
||||
cp -r "$AVRO_GIT_DIR/$item" "$DEST_DIR/"
|
||||
fi
|
||||
done
|
||||
|
||||
# Rename c++ specific files to root level
|
||||
if [[ -d "$DEST_DIR/lang/c++" ]]; then
|
||||
mv "$DEST_DIR/lang/c++"/* "$DEST_DIR/"
|
||||
rm -rf "$DEST_DIR/lang"
|
||||
fi
|
||||
|
||||
# Remove unnecessary files
|
||||
rm -rf "$DEST_DIR/test" || true
|
||||
rm -rf "$DEST_DIR/examples" || true
|
||||
rm -rf "$DEST_DIR/cmake" || true
|
||||
rm -rf "$DEST_DIR/config" || true
|
||||
rm -rf "$DEST_DIR/jsonschemas" || true
|
||||
rm -f "$DEST_DIR/CMakeLists.txt" || true
|
||||
rm -f "$DEST_DIR/build.sh" || true
|
||||
rm -f "$DEST_DIR/Doxyfile" || true
|
||||
rm -f "$DEST_DIR/FindSnappy.cmake" || true
|
||||
rm -f "$DEST_DIR/MainPage.dox" || true
|
||||
rm -f "$DEST_DIR/MSBUILD.md" || true
|
||||
rm -f "$DEST_DIR/.clang-format" || true
|
||||
rm -f "$DEST_DIR/.gitignore" || true
|
||||
|
||||
echo "Avro C++ import completed successfully"
|
||||
|
|
@ -0,0 +1,209 @@
|
|||
#include "mongo/unittest/unittest.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "avro/Compiler.hh"
|
||||
#include "avro/Decoder.hh"
|
||||
#include "avro/Encoder.hh"
|
||||
#include "avro/Exception.hh"
|
||||
#include "avro/Generic.hh"
|
||||
#include "avro/GenericDatum.hh"
|
||||
#include "avro/Stream.hh"
|
||||
#include "avro/Types.hh"
|
||||
#include "avro/ValidSchema.hh"
|
||||
#include "avro/Zigzag.hh"
|
||||
|
||||
TEST(AvroTests, BasicSchemaCompilation) {
|
||||
std::string schema_str = R"({
|
||||
"type": "record",
|
||||
"name": "Test",
|
||||
"fields": [
|
||||
{"name": "id", "type": "int"},
|
||||
{"name": "name", "type": "string"}
|
||||
]
|
||||
})";
|
||||
|
||||
avro::ValidSchema schema = avro::compileJsonSchemaFromString(schema_str.c_str());
|
||||
ASSERT_EQUALS(schema.root()->type(), avro::AVRO_RECORD);
|
||||
}
|
||||
|
||||
TEST(AvroTests, ComplexSchemaTypes) {
|
||||
std::string complex_schema = R"({
|
||||
"type": "record",
|
||||
"name": "ComplexRecord",
|
||||
"fields": [
|
||||
{"name": "id", "type": "long"},
|
||||
{"name": "tags", "type": {"type": "array", "items": "string"}},
|
||||
{"name": "metadata", "type": {"type": "map", "values": "string"}},
|
||||
{"name": "status", "type": {"type": "enum", "name": "Status", "symbols": ["ACTIVE", "INACTIVE", "PENDING"]}},
|
||||
{"name": "optional_field", "type": ["null", "string"], "default": null}
|
||||
]
|
||||
})";
|
||||
|
||||
avro::ValidSchema schema = avro::compileJsonSchemaFromString(complex_schema.c_str());
|
||||
ASSERT_EQUALS(schema.root()->type(), avro::AVRO_RECORD);
|
||||
}
|
||||
|
||||
TEST(AvroTests, GenericDatumOperations) {
|
||||
std::string schema_str = R"({
|
||||
"type": "record",
|
||||
"name": "Person",
|
||||
"fields": [
|
||||
{"name": "id", "type": "int"},
|
||||
{"name": "name", "type": "string"},
|
||||
{"name": "age", "type": "int"}
|
||||
]
|
||||
})";
|
||||
|
||||
avro::ValidSchema schema = avro::compileJsonSchemaFromString(schema_str.c_str());
|
||||
|
||||
// Create a generic record
|
||||
avro::GenericDatum datum(schema);
|
||||
avro::GenericRecord& record = datum.value<avro::GenericRecord>();
|
||||
|
||||
// Set field values
|
||||
record.setFieldAt(0, avro::GenericDatum(int32_t(12345)));
|
||||
record.setFieldAt(1, avro::GenericDatum(std::string("John Doe")));
|
||||
record.setFieldAt(2, avro::GenericDatum(int32_t(30)));
|
||||
|
||||
// Verify field values
|
||||
ASSERT_EQUALS(record.fieldAt(0).value<int32_t>(), 12345);
|
||||
ASSERT_EQUALS(record.fieldAt(1).value<std::string>(), "John Doe");
|
||||
ASSERT_EQUALS(record.fieldAt(2).value<int32_t>(), 30);
|
||||
}
|
||||
|
||||
TEST(AvroTests, BinarySerializationDeserialization) {
|
||||
std::string schema_str = R"({
|
||||
"type": "record",
|
||||
"name": "SimpleRecord",
|
||||
"fields": [
|
||||
{"name": "id", "type": "int"},
|
||||
{"name": "message", "type": "string"}
|
||||
]
|
||||
})";
|
||||
|
||||
avro::ValidSchema schema = avro::compileJsonSchemaFromString(schema_str.c_str());
|
||||
|
||||
// Create test data
|
||||
avro::GenericDatum datum(schema);
|
||||
avro::GenericRecord& record = datum.value<avro::GenericRecord>();
|
||||
record.setFieldAt(0, avro::GenericDatum(int32_t(42)));
|
||||
record.setFieldAt(1, avro::GenericDatum(std::string("Hello Avro!")));
|
||||
|
||||
// Serialize to binary using GenericWriter
|
||||
auto out = avro::memoryOutputStream();
|
||||
avro::EncoderPtr encoder = avro::binaryEncoder();
|
||||
encoder->init(*out);
|
||||
avro::GenericWriter writer(schema, encoder);
|
||||
writer.write(datum);
|
||||
|
||||
// Deserialize from binary using GenericReader
|
||||
auto in = avro::memoryInputStream(*out);
|
||||
avro::DecoderPtr decoder = avro::binaryDecoder();
|
||||
decoder->init(*in);
|
||||
avro::GenericReader reader(schema, decoder);
|
||||
|
||||
avro::GenericDatum deserialized_datum;
|
||||
reader.read(deserialized_datum);
|
||||
|
||||
// Verify deserialized data
|
||||
avro::GenericRecord& deserialized_record = deserialized_datum.value<avro::GenericRecord>();
|
||||
ASSERT_EQUALS(deserialized_record.fieldAt(0).value<int32_t>(), 42);
|
||||
ASSERT_EQUALS(deserialized_record.fieldAt(1).value<std::string>(), "Hello Avro!");
|
||||
}
|
||||
|
||||
TEST(AvroTests, ArrayAndMapTypes) {
|
||||
std::string schema_str = R"({
|
||||
"type": "record",
|
||||
"name": "CollectionRecord",
|
||||
"fields": [
|
||||
{"name": "tags", "type": {"type": "array", "items": "string"}},
|
||||
{"name": "count", "type": "int"}
|
||||
]
|
||||
})";
|
||||
|
||||
avro::ValidSchema schema = avro::compileJsonSchemaFromString(schema_str.c_str());
|
||||
|
||||
// Create test data with arrays
|
||||
avro::GenericDatum datum(schema);
|
||||
avro::GenericRecord& record = datum.value<avro::GenericRecord>();
|
||||
|
||||
// Create array datum directly from schema
|
||||
avro::GenericDatum array_datum(schema.root()->leafAt(0));
|
||||
avro::GenericArray& tags = array_datum.value<avro::GenericArray>();
|
||||
tags.value().push_back(avro::GenericDatum(std::string("tag1")));
|
||||
tags.value().push_back(avro::GenericDatum(std::string("tag2")));
|
||||
tags.value().push_back(avro::GenericDatum(std::string("tag3")));
|
||||
record.setFieldAt(0, array_datum);
|
||||
|
||||
// Set simple field
|
||||
record.setFieldAt(1, avro::GenericDatum(int32_t(42)));
|
||||
|
||||
// Verify array data
|
||||
const avro::GenericArray& retrieved_tags = record.fieldAt(0).value<avro::GenericArray>();
|
||||
ASSERT_EQUALS(retrieved_tags.value().size(), 3U);
|
||||
ASSERT_EQUALS(retrieved_tags.value()[0].value<std::string>(), "tag1");
|
||||
|
||||
// Verify simple field
|
||||
ASSERT_EQUALS(record.fieldAt(1).value<int32_t>(), 42);
|
||||
}
|
||||
|
||||
TEST(AvroTests, UnionTypes) {
|
||||
std::string schema_str = R"({
|
||||
"type": "record",
|
||||
"name": "UnionRecord",
|
||||
"fields": [
|
||||
{"name": "optional_string", "type": ["null", "string"]},
|
||||
{"name": "simple_field", "type": "int"}
|
||||
]
|
||||
})";
|
||||
|
||||
avro::ValidSchema schema = avro::compileJsonSchemaFromString(schema_str.c_str());
|
||||
|
||||
// Create test data with union types
|
||||
avro::GenericDatum datum(schema);
|
||||
avro::GenericRecord& record = datum.value<avro::GenericRecord>();
|
||||
|
||||
// Create union datum directly from schema and use GenericDatum's union methods
|
||||
avro::GenericDatum union_datum(schema.root()->leafAt(0));
|
||||
union_datum.selectBranch(1); // Select string branch (index 1)
|
||||
// Set the union value through the GenericDatum interface
|
||||
union_datum.value<std::string>() = "Hello Union!";
|
||||
record.setFieldAt(0, union_datum);
|
||||
|
||||
// Set simple field
|
||||
record.setFieldAt(1, avro::GenericDatum(int32_t(42)));
|
||||
|
||||
// Verify union data using GenericDatum's union methods
|
||||
const avro::GenericDatum& retrieved_union = record.fieldAt(0);
|
||||
ASSERT_EQUALS(retrieved_union.unionBranch(), 1U);
|
||||
ASSERT_EQUALS(retrieved_union.value<std::string>(), "Hello Union!");
|
||||
|
||||
// Verify simple field
|
||||
ASSERT_EQUALS(record.fieldAt(1).value<int32_t>(), 42);
|
||||
}
|
||||
|
||||
TEST(AvroTests, ZigzagEncoding) {
|
||||
// Test zigzag encoding which is used internally by Avro for efficient integer storage
|
||||
int32_t positive = 123;
|
||||
int32_t negative = -123;
|
||||
|
||||
uint32_t encoded_positive = avro::encodeZigzag32(positive);
|
||||
uint32_t encoded_negative = avro::encodeZigzag32(negative);
|
||||
|
||||
int32_t decoded_positive = avro::decodeZigzag32(encoded_positive);
|
||||
int32_t decoded_negative = avro::decodeZigzag32(encoded_negative);
|
||||
|
||||
ASSERT_EQUALS(decoded_positive, positive);
|
||||
ASSERT_EQUALS(decoded_negative, negative);
|
||||
}
|
||||
|
||||
TEST(AvroTests, ErrorHandling) {
|
||||
// Try to compile an invalid schema - should throw an exception
|
||||
std::string invalid_schema = R"({
|
||||
"type": "invalid_type",
|
||||
"name": "BadSchema"
|
||||
})";
|
||||
|
||||
ASSERT_THROWS(avro::compileJsonSchemaFromString(invalid_schema.c_str()), avro::Exception);
|
||||
}
|
||||
Loading…
Reference in New Issue