Revision e82c2fd178df7d2f0532ec6f90463a24c4e8d272 authored by Piotr Tabor on 04 October 2021, 06:40:11 UTC, committed by GitHub on 04 October 2021, 06:40:11 UTC
[release-3.3] Dockerfile: bump debian bullseye-20210927
2 parent s 984d71c + 24801f5
Raw File
case_sigquit_remove.go
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tester

import (
	"context"
	"fmt"
	"sort"
	"strings"
	"time"

	"github.com/coreos/etcd/clientv3"
	"github.com/coreos/etcd/functional/rpcpb"

	"go.uber.org/zap"
)

func inject_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
	cli1, err := clus.Members[idx1].CreateEtcdClient()
	if err != nil {
		return err
	}
	defer cli1.Close()

	var mresp *clientv3.MemberListResponse
	mresp, err = cli1.MemberList(context.Background())
	mss := []string{}
	if err == nil && mresp != nil {
		mss = describeMembers(mresp)
	}
	clus.lg.Info(
		"member list before disastrous machine failure",
		zap.String("request-to", clus.Members[idx1].EtcdClientEndpoint),
		zap.Strings("members", mss),
		zap.Error(err),
	)
	if err != nil {
		return err
	}

	sresp, serr := cli1.Status(context.Background(), clus.Members[idx1].EtcdClientEndpoint)
	if serr != nil {
		return serr
	}
	id1 := sresp.Header.MemberId
	is1 := fmt.Sprintf("%016x", id1)

	clus.lg.Info(
		"disastrous machine failure START",
		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
		zap.String("target-member-id", is1),
		zap.Error(err),
	)
	err = clus.sendOp(idx1, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA)
	clus.lg.Info(
		"disastrous machine failure END",
		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
		zap.String("target-member-id", is1),
		zap.Error(err),
	)
	if err != nil {
		return err
	}

	time.Sleep(2 * time.Second)

	idx2 := (idx1 + 1) % len(clus.Members)
	var cli2 *clientv3.Client
	cli2, err = clus.Members[idx2].CreateEtcdClient()
	if err != nil {
		return err
	}
	defer cli2.Close()

	// FIXME(bug): this may block forever during
	// "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT"
	// is the new leader too busy with snapshotting?
	// is raft proposal dropped?
	// enable client keepalive for failover?
	clus.lg.Info(
		"member remove after disaster START",
		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
		zap.String("target-member-id", is1),
		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
	)
	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
	_, err = cli2.MemberRemove(ctx, id1)
	cancel()
	clus.lg.Info(
		"member remove after disaster END",
		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
		zap.String("target-member-id", is1),
		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
		zap.Error(err),
	)
	if err != nil {
		return err
	}

	time.Sleep(2 * time.Second)

	mresp, err = cli2.MemberList(context.Background())
	mss = []string{}
	if err == nil && mresp != nil {
		mss = describeMembers(mresp)
	}
	clus.lg.Info(
		"member list after member remove",
		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
		zap.Strings("members", mss),
		zap.Error(err),
	)
	return err
}

func recover_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
	idx2 := (idx1 + 1) % len(clus.Members)
	cli2, err := clus.Members[idx2].CreateEtcdClient()
	if err != nil {
		return err
	}
	defer cli2.Close()

	_, err = cli2.MemberAdd(context.Background(), clus.Members[idx1].Etcd.AdvertisePeerURLs)
	clus.lg.Info(
		"member add before fresh restart",
		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
		zap.Error(err),
	)
	if err != nil {
		return err
	}

	time.Sleep(2 * time.Second)

	clus.Members[idx1].Etcd.InitialClusterState = "existing"
	err = clus.sendOp(idx1, rpcpb.Operation_RESTART_ETCD)
	clus.lg.Info(
		"fresh restart after member add",
		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
		zap.Error(err),
	)
	if err != nil {
		return err
	}

	time.Sleep(2 * time.Second)

	var mresp *clientv3.MemberListResponse
	mresp, err = cli2.MemberList(context.Background())
	mss := []string{}
	if err == nil && mresp != nil {
		mss = describeMembers(mresp)
	}
	clus.lg.Info(
		"member list after member add",
		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
		zap.Strings("members", mss),
		zap.Error(err),
	)
	return err
}

func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Case {
	cc := caseByFunc{
		rpcpbCase:     rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER,
		injectMember:  inject_SIGQUIT_ETCD_AND_REMOVE_DATA,
		recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA,
	}
	c := &caseFollower{cc, -1, -1}
	return &caseDelay{
		Case:          c,
		delayDuration: clus.GetCaseDelayDuration(),
	}
}

func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case {
	return &caseUntilSnapshot{
		rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT,
		Case:      new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus),
	}
}

func new_Case_SIGQUIT_AND_REMOVE_LEADER(clus *Cluster) Case {
	cc := caseByFunc{
		rpcpbCase:     rpcpb.Case_SIGQUIT_AND_REMOVE_LEADER,
		injectMember:  inject_SIGQUIT_ETCD_AND_REMOVE_DATA,
		recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA,
	}
	c := &caseLeader{cc, -1, -1}
	return &caseDelay{
		Case:          c,
		delayDuration: clus.GetCaseDelayDuration(),
	}
}

func new_Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case {
	return &caseUntilSnapshot{
		rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT,
		Case:      new_Case_SIGQUIT_AND_REMOVE_LEADER(clus),
	}
}

func describeMembers(mresp *clientv3.MemberListResponse) (ss []string) {
	ss = make([]string, len(mresp.Members))
	for i, m := range mresp.Members {
		ss[i] = fmt.Sprintf("Name %s / ID %016x / ClientURLs %s / PeerURLs %s",
			m.Name,
			m.ID,
			strings.Join(m.ClientURLs, ","),
			strings.Join(m.PeerURLs, ","),
		)
	}
	sort.Strings(ss)
	return ss
}
back to top