Revision 4f69c98ae95681cf972fa6701c94dbbb28e40d80 authored by sychen on 09 September 2022, 21:36:39 UTC, committed by Dongjoon Hyun on 09 September 2022, 21:36:39 UTC
### What changes were proposed in this pull request? Increase ORC test coverage. [ORC-1205](https://issues.apache.org/jira/browse/ORC-1205) Size of batches in some ConvertTreeReaders should be ensured before using ### Why are the changes needed? When spark reads an orc with type promotion, an `ArrayIndexOutOfBoundsException` may be thrown, which has been fixed in version 1.7.6 and 1.8.0. ```java java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.orc.impl.TreeReaderFactory$TreeReader.nextVector(TreeReaderFactory.java:387) at org.apache.orc.impl.TreeReaderFactory$LongTreeReader.nextVector(TreeReaderFactory.java:740) at org.apache.orc.impl.ConvertTreeReaderFactory$StringGroupFromAnyIntegerTreeReader.nextVector(ConvertTreeReaderFactory.java:1069) at org.apache.orc.impl.reader.tree.StructBatchReader.readBatchColumn(StructBatchReader.java:65) ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add UT Closes #37808 from cxzl25/SPARK-39830-3.3. Authored-by: sychen <sychen@ctrip.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent aaa8292
appveyor.yml
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
version: "{build}-{branch}"
shallow_clone: true
platform: x64
configuration: Debug
branches:
only:
- master
only_commits:
files:
- appveyor.yml
- dev/appveyor-install-dependencies.ps1
- R/
- sql/core/src/main/scala/org/apache/spark/sql/api/r/
- core/src/main/scala/org/apache/spark/api/r/
- mllib/src/main/scala/org/apache/spark/ml/r/
- core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
- bin/*.cmd
cache:
- C:\Users\appveyor\.m2
install:
# Install SBT and dependencies
- ps: .\dev\appveyor-install-dependencies.ps1
# Required package for R unit tests. xml2 is required to use jUnit reporter in testthat.
- cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
- cmd: Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]"
build_script:
# '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
# See SPARK-28759.
# Ideally we should check the tests related to Hive in SparkR as well (SPARK-31745).
- cmd: set SBT_MAVEN_PROFILES=-Psparkr
- cmd: set SBT_OPTS=-Djna.nosys=true -Dfile.encoding=UTF-8 -Xms4096m -Xms4096m -XX:ReservedCodeCacheSize=128m
- cmd: sbt package
- cmd: set SBT_MAVEN_PROFILES=
- cmd: set SBT_OPTS=
environment:
NOT_CRAN: true
# See SPARK-27848. Currently installing some dependent packages causes
# "(converted from warning) unable to identify current timezone 'C':" for an unknown reason.
# This environment variable works around to test SparkR against a higher version.
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
test_script:
- cmd: .\bin\spark-submit2.cmd --driver-java-options "-Dlog4j.configuration=file:///%CD:\=/%/R/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R
notifications:
- provider: Email
on_build_success: false
on_build_failure: false
on_build_status_changed: false
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...