Revision 59f0b3bb472e118ce905bf07c5567bcedeaca5e6 authored by Saurabh Yadav on 11 January 2022, 12:49:01 UTC, committed by GitHub on 11 January 2022, 12:49:01 UTC
Issue Summary: File downloaded from cloud storage is not flushed. In case of lot of files, for some files, changes don't reflect in the actual physical file. Actual file is later accessed by filename, which leads to read error.

Issue Error logs:

```
2022-01-05 09:54:14,992 DEBG 'runserver' stderr output:
[Wed Jan 05 09:54:14.992125 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] [2022-01-05 09:54:14,991] ERROR cvat.server.task_12: cannot get requested data type: chunk, number: 0, quality: Quality.COMPRESSED
[Wed Jan 05 09:54:14.992147 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] Traceback (most recent call last):
[Wed Jan 05 09:54:14.992152 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]   File "/home/django/cvat/apps/engine/cache.py", line 96, in prepare_chunk_buff
[Wed Jan 05 09:54:14.992156 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]     if checksum and not md5_hash(source_path) == checksum:
[Wed Jan 05 09:54:14.992159 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]   File "/home/django/cvat/apps/engine/utils.py", line 100, in md5_hash
[Wed Jan 05 09:54:14.992163 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]     frame = Image.open(frame, 'r')
[Wed Jan 05 09:54:14.992166 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]   File "/opt/venv/lib/python3.8/site-packages/PIL/Image.py", line 3023, in open
[Wed Jan 05 09:54:14.992186 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]     raise UnidentifiedImageError(
[Wed Jan 05 09:54:14.992189 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] PIL.UnidentifiedImageError: cannot identify image file '/tmp/cvat_041w7vsimages#0089c635-ae5f-49d7-baa5-a4d75f0412ca.png'
[Wed Jan 05 09:54:14.992192 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]
[Wed Jan 05 09:54:14.992196 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] During handling of the above exception, another exception occurred:
[Wed Jan 05 09:54:14.992199 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]

2022-01-05 09:54:14,992 DEBG 'runserver' stderr output:
[Wed Jan 05 09:54:14.992202 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] Traceback (most recent call last):
[Wed Jan 05 09:54:14.992205 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]   File "/home/django/cvat/apps/engine/views.py", line 689, in data
[Wed Jan 05 09:54:14.992208 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]     buff, mime_type = frame_provider.get_chunk(data_id, data_quality)
[Wed Jan 05 09:54:14.992211 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]   File "/home/django/cvat/apps/engine/frame_provider.py", line 167, in get_chunk
[Wed Jan 05 09:54:14.992215 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]     return self._loaders[quality].get_chunk_path(chunk_number, quality, self._db_data)
[Wed Jan 05 09:54:14.992218 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]   File "/home/django/cvat/apps/engine/cache.py", line 32, in get_buff_mime
[Wed Jan 05 09:54:14.992221 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]     chunk, tag = self.prepare_chunk_buff(db_data, quality, chunk_number)
[Wed Jan 05 09:54:14.992224 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]   File "/home/django/cvat/apps/engine/cache.py", line 113, in prepare_chunk_buff
[Wed Jan 05 09:54:14.992227 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868]     raise Exception(msg)
[Wed Jan 05 09:54:14.992231 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] Exception: cannot identify image file '/tmp/cvat_041w7vsimages#0089c635-ae5f-49d7-baa5-a4d75f0412ca.png'
[Wed Jan 05 09:54:14.992414 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] ERROR - 2022-01-05 09:54:14,991 - views - cannot get requested data type: chunk, number: 0, quality: Quality.COMPRESSED
[Wed Jan 05 09:54:14.992425 2022] [wsgi:error] [pid 330:tid 139683931096832] [remote 172.20.0.2:56868] Traceback (most recent call last):
```
1 parent cd1f89e
Raw File
wait-for-it.sh
#!/usr/bin/env bash
#   Use this script to test if a given TCP host/port are available
#   https://github.com/vishnubob/wait-for-it

cmdname=$(basename $0)

echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }

usage()
{
    cat << USAGE >&2
Usage:
    $cmdname host:port [-s] [-t timeout] [-- command args]
    -h HOST | --host=HOST       Host or IP under test
    -p PORT | --port=PORT       TCP port under test
                                Alternatively, you specify the host and port as host:port
    -s | --strict               Only execute subcommand if the test succeeds
    -q | --quiet                Don't output any status messages
    -t TIMEOUT | --timeout=TIMEOUT
                                Timeout in seconds, zero for no timeout
    -- COMMAND ARGS             Execute command with args after the test finishes
USAGE
    exit 1
}

wait_for()
{
    if [[ $TIMEOUT -gt 0 ]]; then
        echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT"
    else
        echoerr "$cmdname: waiting for $HOST:$PORT without a timeout"
    fi
    start_ts=$(date +%s)
    while :
    do
        if [[ $ISBUSY -eq 1 ]]; then
            nc -z $HOST $PORT
            result=$?
        else
            (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1
            result=$?
        fi
        if [[ $result -eq 0 ]]; then
            end_ts=$(date +%s)
            echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds"
            break
        fi
        sleep 1
    done
    return $result
}

wait_for_wrapper()
{
    # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
    if [[ $QUIET -eq 1 ]]; then
        timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
    else
        timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
    fi
    PID=$!
    trap "kill -INT -$PID" INT
    wait $PID
    RESULT=$?
    if [[ $RESULT -ne 0 ]]; then
        echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT"
    fi
    return $RESULT
}

# process arguments
while [[ $# -gt 0 ]]
do
    case "$1" in
        *:* )
        hostport=(${1//:/ })
        HOST=${hostport[0]}
        PORT=${hostport[1]}
        shift 1
        ;;
        --child)
        CHILD=1
        shift 1
        ;;
        -q | --quiet)
        QUIET=1
        shift 1
        ;;
        -s | --strict)
        STRICT=1
        shift 1
        ;;
        -h)
        HOST="$2"
        if [[ $HOST == "" ]]; then break; fi
        shift 2
        ;;
        --host=*)
        HOST="${1#*=}"
        shift 1
        ;;
        -p)
        PORT="$2"
        if [[ $PORT == "" ]]; then break; fi
        shift 2
        ;;
        --port=*)
        PORT="${1#*=}"
        shift 1
        ;;
        -t)
        TIMEOUT="$2"
        if [[ $TIMEOUT == "" ]]; then break; fi
        shift 2
        ;;
        --timeout=*)
        TIMEOUT="${1#*=}"
        shift 1
        ;;
        --)
        shift
        CLI=("$@")
        break
        ;;
        --help)
        usage
        ;;
        *)
        echoerr "Unknown argument: $1"
        usage
        ;;
    esac
done

if [[ "$HOST" == "" || "$PORT" == "" ]]; then
    echoerr "Error: you need to provide a host and port to test."
    usage
fi

TIMEOUT=${TIMEOUT:-15}
STRICT=${STRICT:-0}
CHILD=${CHILD:-0}
QUIET=${QUIET:-0}

# check to see if timeout is from busybox?
# check to see if timeout is from busybox?
TIMEOUT_PATH=$(realpath $(which timeout))
if [[ $TIMEOUT_PATH =~ "busybox" ]]; then
        ISBUSY=1
        BUSYTIMEFLAG="-t"
else
        ISBUSY=0
        BUSYTIMEFLAG=""
fi

if [[ $CHILD -gt 0 ]]; then
    wait_for
    RESULT=$?
    exit $RESULT
else
    if [[ $TIMEOUT -gt 0 ]]; then
        wait_for_wrapper
        RESULT=$?
    else
        wait_for
        RESULT=$?
    fi
fi

if [[ $CLI != "" ]]; then
    if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then
        echoerr "$cmdname: strict mode, refusing to execute subprocess"
        exit $RESULT
    fi
    exec "${CLI[@]}"
else
    exit $RESULT
fi
back to top