Libp2p circuit, all dials failed, i/o timeout - v0.32.2

I am trying to connect two servers via a libp2p relay, one sends data, the other receives it.
It was working fine until I shifted all of them to remote instances and tried configuring them over interal IP.
The stream was established without a problem the first time but then for other submissions it just started failing all dials, this keeps happening randomly throughout the whole program run.
I have tried configuring a simple relay node with a public addr and even gone as far as adding every known option and resource possible.
this is what my final relayer configuration looks like

	cfg := rcmgr.PartialLimitConfig{
			System: rcmgr.ResourceLimits{
				Streams:         rcmgr.Unlimited,
				StreamsOutbound: rcmgr.Unlimited,
				StreamsInbound:  rcmgr.Unlimited,
				Conns:           rcmgr.Unlimited,
				ConnsInbound:    rcmgr.Unlimited,
				ConnsOutbound:   rcmgr.Unlimited,
				FD:              rcmgr.Unlimited,
				Memory:          rcmgr.LimitVal64(rcmgr.Unlimited),
			},
		}

	limits := cfg.Build(scaledDefaultLimits)

	limiter := rcmgr.NewFixedLimiter(limits)

	rm, err := rcmgr.NewResourceManager(limiter, rcmgr.WithMetricsDisabled())
	if err != nil {
		log.Debugln("Unable to build resource manager")
		return nil
	}

	relayerHost, err := libp2p.New(
		libp2p.EnableRelay(),
		libp2p.ListenAddrs(publicAddr),
		libp2p.ConnectionManager(connManager),
		libp2p.Security(libp2ptls.ID, libp2ptls.New),
		libp2p.Security(noise.ID, noise.New),
		libp2p.DefaultTransports,
		libp2p.NATPortMap(),
		libp2p.EnableRelayService(),
		libp2p.EnableNATService(),
		libp2p.ResourceManager(rm),
		libp2p.EnableHolePunching())

	if err != nil {
		log.Debugln(err)
	}

	relay.New(relayerHost)

And this is the server configuration for receiving data over the relay

    cfg := rcmgr.PartialLimitConfig{
		System: rcmgr.ResourceLimits{
			Streams:         rcmgr.Unlimited,
			StreamsOutbound: rcmgr.Unlimited,
			StreamsInbound:  rcmgr.Unlimited,
			Conns:           rcmgr.Unlimited,
			ConnsInbound:    rcmgr.Unlimited,
			ConnsOutbound:   rcmgr.Unlimited,
			FD:              rcmgr.Unlimited,
			Memory:          rcmgr.LimitVal64(rcmgr.Unlimited),
		},
	}

	limits := cfg.Build(scaledDefaultLimits)

	limiter := rcmgr.NewFixedLimiter(limits)

	rm, err := rcmgr.NewResourceManager(limiter, rcmgr.WithMetricsDisabled())
	if err != nil {
		log.Debugln("Unable to build resource manager")
	}

	collectorHost, err = libp2p.New(
		libp2p.EnableRelay(),
		libp2p.ConnectionManager(connManager),
		libp2p.ListenAddrs(publicAddr),
		libp2p.Security(libp2ptls.ID, libp2ptls.New),
		libp2p.Security(noise.ID, noise.New),
		libp2p.DefaultTransports,
		libp2p.NATPortMap(),
		libp2p.EnableNATService(),
		libp2p.ResourceManager(rm),
		libp2p.EnableHolePunching(),
	)
	if err != nil {
		log.Debugln(err)
	}

This is the last resort config that was setup and basically includes a bunch of options that are not required, would be great if I could know of any possible points of failure that cause recurring stream i/o timeouts and dial fails

The receiving service listens on port 9100 on the remote instance, both the services have setup public tcp addrs

  • /ip4/{internal_IP}/tcp/9100

port 9100 is accessible from all the instances and tcp and udp are both allowed