Index: share/man/man4/stf.4 =================================================================== --- share/man/man4/stf.4 (revision 261797) +++ share/man/man4/stf.4 (working copy) @@ -1,6 +1,7 @@ .\" $KAME: stf.4,v 1.35 2001/05/02 06:24:49 itojun Exp $ .\" .\" Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. +.\" Copyright (c) 2010 Hiroki Sato .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -42,22 +43,12 @@ .Sh DESCRIPTION The .Nm -interface supports -.Dq 6to4 -IPv6 in IPv4 encapsulation. -It can tunnel IPv6 traffic over IPv4, as specified in -.Li RFC3056 . +interface supports IPv6 in IPv4 encapsulation by +tunneling IPv6 traffic over IPv4, as specified in +.Li RFC3056 Pq 6to4 +and +.Li RFC5569 Pq 6rd . .Pp -For ordinary nodes in 6to4 site, you do not need -.Nm -interface. -The -.Nm -interface is necessary for site border router -(called -.Dq 6to4 router -in the specification). -.Pp Each .Nm interface is created at runtime using interface cloning. @@ -72,12 +63,28 @@ .Pp Due to the way 6to4 protocol is specified, .Nm -interface requires certain configuration to work properly. +interface requires certain configuration to work properly. Two +different protocols defined in RFC3056 and RFC5569 are basically the +same as each other except for address handling, so +.Nm +decides its behavior based on the configured IPv6 addresses as +explained in the following. +The +.Nm +interface can be configured with multiple IPv6 addresses including +both 6to4 and 6rd. +.Sh RFC3056 (a.k.a. 6to4) Single -(no more than 1) -valid 6to4 address needs to be configured to the interface. -.Dq A valid 6to4 address -is an address which has the following properties. +.Pq no more than 1 valid 6to4 address needs to be configured to the interface. +.Dq a valid 6to4 address +is an address which has the following properties. For ordinary nodes +in 6to4 site, you do not need +.Nm +interface; it is necessary only for site border router +(called +.Dq 6to4 router +in the specification). +.Pp If any of the following properties are not satisfied, .Nm raises runtime error on packet transmission. @@ -110,8 +117,80 @@ .Nm interface will check the IPv4 source address on packets, if the IPv6 prefix length is larger than 16. +.Sh RFC5569 (a.k.a. 6rd) +The +.Nm +interface works in the 6rd mode when one or more IPv6 addresses that +consists of an IPv6 prefix and 32-bit IPv4 part with a prefix length +equal to or shorter than 64. In 6rd protocol, an IPv6 address +.Li 2001:db8:c000:205::1/32 +means the following, for example: +.Bl -bullet +.It +The 6rd relay prefix is +.Li 2001:db8::/32 . +.It +The 6rd router's IPv4 address is +.Li 192.0.2.5 . +.El .Pp +As you can see the IPv4 address is embedded in the IPv6 address just +after the prefix. While you can choose an IPv6 prefix length other +than 32, it must be from 0 to 32. +.Pp +Assuming this address is configured on the .Nm +interface, it does the following: +.Bl -bullet +.It +An incoming IPv6 packet on +.Nm +will be encapsuled in an IPv4 packet with the source address +.Li 192.0.2.5 +and then the IPv4 packet is delivered based on the IPv4 routing table. +The IPv4 destination address is calculated from the destination +address of the original IPv6 packet in the same way as the source. +.It +An incoming IPv4 packet which encapsules an IPv6 packet whose +destination address matches a 6rd prefix with embedded IPv4 address +configured on the +.Nm +interface, the IPv6 packet will be decapsulated and delivered based on +the IPv6 routing table. Note that +.Nm +interface normally has a route which covers whole range of a 6rd relay +prefix, the delivered IPv6 packet can return to +.Nm +if there is no more specific route. In that case, the returned packet +will be discarded silently. +.El +.\" XXX: example configuration will be added +.\" .Pp +.\" By using this interface, you can configure a 6rd domain. For simplicity, +.\" we assume the following here: +.\" .Bl -bullet +.\" .It +.\" A 6rd Customer, who has an IPv6/IPv4 LAN and an IPv4-only access +.\" toward network of his Internet Service Provider. The Customer has +.\" a router called +.\" .Dq CE Pq Customer Edge +.\" Router, which can communicate between his LAN and the ISP over IPv4 +.\" and encapsulate +.\" his networks. +.\" .It +.\" A 6rd Provider, who provides IPv6 Internet reachability by using 6rd +.\" protocol. The Provider offers access to a router called +.\" .Dq PE Pq Provider Edge +.\" Router, which can communicate with +.\" .El +.\" .Pp +.\" A 6rd customer +.\" needs to configure +.\" .Nm +.\" on his CE (Customer Edge) router. +.Sh Other Functionality of the Interface +.Pp +.Nm can be configured to be ECN friendly. This can be configured by .Dv IFF_LINK1 . @@ -147,9 +226,6 @@ Packets with limited broadcast address as outer IPv4 source/destination .Pq Li 255.0.0.0/8 .It -Packets with private address as outer IPv4 source/destination -.Pq Li 10.0.0.0/8 , 172.16.0.0/12 , 192.168.0.0/16 -.It Packets with subnet broadcast address as outer IPv4 source/destination. The check is made against subnet broadcast addresses for all of the directly connected subnets. @@ -164,6 +240,11 @@ inner IPv6 address, if the IPv6 address matches 6to4 prefix. .El .Pp +In addition to them, packets with private address as outer IPv4 +source/destination +.Pq Li 10.0.0.0/8 , 172.16.0.0/12 , 192.168.0.0/16 +are filtered out only in the 6to4 mode. +.Pp It is recommended to filter/audit incoming IPv4 packet with IP protocol number 41, as necessary. It is also recommended to filter/audit encapsulated IPv6 packets as well. Index: sys/net/if_stf.c =================================================================== --- sys/net/if_stf.c (revision 261797) +++ sys/net/if_stf.c (working copy) @@ -3,6 +3,7 @@ /*- * Copyright (C) 2000 WIDE Project. + * Copyright (c) 2010 Hiroki Sato * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +32,7 @@ */ /* - * 6to4 interface, based on RFC3056. + * 6to4 interface, based on RFC3056 + 6rd (RFC5569) support. * * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. * There is no address mapping defined from IPv6 multicast address to IPv4 @@ -60,7 +61,7 @@ * ICMPv6: * - Redirects cannot be used due to the lack of link-local address. * - * stf interface does not have, and will not need, a link-local address. + * stf interface does not have, and will not need, a link-local address. * It seems to have no real benefit and does not help the above symptoms much. * Even if we assign link-locals to interface, we cannot really * use link-local unicast/multicast on top of 6to4 cloud (since there's no @@ -72,6 +73,12 @@ * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt * for details. The code tries to filter out some of malicious packets. * Note that there is no way to be 100% secure. + * + * 6rd (RFC5569) extension is enabled when an IPv6 GUA other than + * 2002::/16 is assigned. The stf(4) recognizes a 32-bit just after + * prefixlen as the IPv4 address of the 6rd customer site. The + * prefixlen must be shorter than 32. + * */ #include "opt_inet.h" @@ -120,13 +127,40 @@ #include +#define STF_DEBUG 1 +#define ip_sprintf(buf, a) \ + sprintf(buf, "%d.%d.%d.%d", \ + (ntohl((a)->s_addr)>>24)&0xFF, \ + (ntohl((a)->s_addr)>>16)&0xFF, \ + (ntohl((a)->s_addr)>>8)&0xFF, \ + (ntohl((a)->s_addr))&0xFF); +#if STF_DEBUG +#define DEBUG_PRINTF(a, ...) \ + do { \ + if (V_stf_debug >= a) \ + printf(__VA_ARGS__); \ + } while (0) +#else +#define DEBUG_PRINTF(a, ...) +#endif + SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface"); -static int stf_route_cache = 1; -SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW, - &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output"); +static VNET_DEFINE(int, stf_route_cache) = 1; +#define V_stf_route_cache VNET(stf_route_cache) +SYSCTL_VNET_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW, + &VNET_NAME(stf_route_cache), 0, + "Enable caching of IPv4 routes for 6to4 output."); +#if STF_DEBUG +static VNET_DEFINE(int, stf_debug) = 0; +#define V_stf_debug VNET(stf_debug) +SYSCTL_VNET_INT(_net_link_stf, OID_AUTO, stf_debug, CTLFLAG_RW, + &VNET_NAME(stf_debug), 0, + "Enable displaying verbose debug message of stf interfaces"); +#endif + static int stf_permit_rfc1918 = 0; TUNABLE_INT("net.link.stf.permit_rfc1918", &stf_permit_rfc1918); SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN, @@ -133,7 +167,6 @@ &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses"); #define STFNAME "stf" -#define STFUNIT 0 #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) @@ -150,17 +183,26 @@ struct route_in6 __sc_ro6; /* just for safety */ } __sc_ro46; #define sc_ro __sc_ro46.__sc_ro4 - struct mtx sc_ro_mtx; + struct mtx sc_mtx; u_int sc_fibnum; const struct encaptab *encap_cookie; + u_int sc_flags; + LIST_ENTRY(stf_softc) stf_list; }; #define STF2IFP(sc) ((sc)->sc_ifp) -/* - * Note that mutable fields in the softc are not currently locked. - * We do lock sc_ro in stf_output though. - */ +static struct mtx stf_mtx; static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface"); +static VNET_DEFINE(LIST_HEAD(, stf_softc), stf_softc_list); +#define V_stf_softc_list VNET(stf_softc_list) + +#define STF_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "stf softc", \ + NULL, MTX_DEF); +#define STF_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) +#define STF_LOCK(sc) mtx_lock(&(sc)->sc_mtx) +#define STF_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) +#define STF_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) + static const int ip_stf_ttl = 40; extern struct domain inetdomain; @@ -175,8 +217,6 @@ .pr_usrreqs = &rip_usrreqs }; -static char *stfnames[] = {"stf0", "stf", "6to4", NULL}; - static int stfmodevent(module_t, int, void *); static int stf_encapcheck(const struct mbuf *, int, int, void *); static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *); @@ -190,67 +230,43 @@ static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int stf_ioctl(struct ifnet *, u_long, caddr_t); -static int stf_clone_match(struct if_clone *, const char *); -static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t); -static int stf_clone_destroy(struct if_clone *, struct ifnet *); -struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0, - NULL, stf_clone_match, stf_clone_create, stf_clone_destroy); +#define STF_GETIN4_USE_CACHE 1 +static struct sockaddr_in *stf_getin4addr(struct sockaddr_in *, + struct ifaddr *, + int); +static struct sockaddr_in *stf_getin4addr_in6(struct sockaddr_in *, + struct ifaddr *, + struct in6_addr *); +static struct sockaddr_in *stf_getin4addr_sin6(struct sockaddr_in *, + struct ifaddr *, + struct sockaddr_in6 *); +static int stf_clone_create(struct if_clone *, int, caddr_t); +static void stf_clone_destroy(struct ifnet *); -static int -stf_clone_match(struct if_clone *ifc, const char *name) -{ - int i; +IFC_SIMPLE_DECLARE(stf, 0); - for(i = 0; stfnames[i] != NULL; i++) { - if (strcmp(stfnames[i], name) == 0) - return (1); - } - - return (0); -} - static int -stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) +stf_clone_create(struct if_clone *ifc, int unit, caddr_t params) { - int err, unit; struct stf_softc *sc; struct ifnet *ifp; - /* - * We can only have one unit, but since unit allocation is - * already locked, we use it to keep from allocating extra - * interfaces. - */ - unit = STFUNIT; - err = ifc_alloc_unit(ifc, &unit); - if (err != 0) - return (err); - sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO); + sc->sc_fibnum = curthread->td_proc->p_fibnum; ifp = STF2IFP(sc) = if_alloc(IFT_STF); - if (ifp == NULL) { + if (sc->sc_ifp == NULL) { free(sc, M_STF); - ifc_free_unit(ifc, unit); - return (ENOSPC); + return (ENOMEM); } + STF_LOCK_INIT(sc); ifp->if_softc = sc; - sc->sc_fibnum = curthread->td_proc->p_fibnum; + if_initname(ifp, ifc->ifc_name, unit); - /* - * Set the name manually rather then using if_initname because - * we don't conform to the default naming convention for interfaces. - */ - strlcpy(ifp->if_xname, name, IFNAMSIZ); - ifp->if_dname = ifc->ifc_name; - ifp->if_dunit = IF_DUNIT_NONE; - - mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF); sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, &in_stf_protosw, sc); if (sc->encap_cookie == NULL) { if_printf(ifp, "attach failed\n"); free(sc, M_STF); - ifc_free_unit(ifc, unit); return (ENOMEM); } @@ -260,41 +276,57 @@ ifp->if_snd.ifq_maxlen = ifqmaxlen; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); + + mtx_lock(&stf_mtx); + LIST_INSERT_HEAD(&V_stf_softc_list, sc, stf_list); + mtx_unlock(&stf_mtx); + return (0); } -static int -stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) +static void +stf_clone_destroy(struct ifnet *ifp) { struct stf_softc *sc = ifp->if_softc; int err; + mtx_lock(&stf_mtx); + LIST_REMOVE(sc, stf_list); + mtx_unlock(&stf_mtx); + err = encap_detach(sc->encap_cookie); KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); - mtx_destroy(&(sc)->sc_ro_mtx); bpfdetach(ifp); if_detach(ifp); if_free(ifp); + STF_LOCK_DESTROY(sc); free(sc, M_STF); - ifc_free_unit(ifc, STFUNIT); - return (0); + return; } +static void +vnet_stf_init(const void *unused __unused) +{ + + LIST_INIT(&V_stf_softc_list); +} +VNET_SYSINIT(vnet_stf_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_stf_init, + NULL); + static int -stfmodevent(mod, type, data) - module_t mod; - int type; - void *data; +stfmodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: + mtx_init(&stf_mtx, "stf_mtx", NULL, MTX_DEF); if_clone_attach(&stf_cloner); break; case MOD_UNLOAD: if_clone_detach(&stf_cloner); + mtx_destroy(&stf_mtx); break; default: return (EOPNOTSUPP); @@ -310,28 +342,31 @@ }; DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_stf, 1); static int -stf_encapcheck(m, off, proto, arg) - const struct mbuf *m; - int off; - int proto; - void *arg; +stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { struct ip ip; struct in6_ifaddr *ia6; + struct sockaddr_in ia6_in4addr; + struct sockaddr_in ia6_in4mask; + struct sockaddr_in *sin; struct stf_softc *sc; - struct in_addr a, b, mask; + struct ifnet *ifp; + int ret = 0; + DEBUG_PRINTF(1, "%s: enter\n", __func__); sc = (struct stf_softc *)arg; if (sc == NULL) return 0; + ifp = STF2IFP(sc); - if ((STF2IFP(sc)->if_flags & IFF_UP) == 0) + if ((ifp->if_flags & IFF_UP) == 0) return 0; /* IFF_LINK0 means "no decapsulation" */ - if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0) + if ((ifp->if_flags & IFF_LINK0) != 0) return 0; if (proto != IPPROTO_IPV6) @@ -343,86 +378,162 @@ if (ip.ip_v != 4) return 0; - ia6 = stf_getsrcifa6(STF2IFP(sc)); + /* Lookup an ia6 whose IPv4 addr encoded in the IPv6 addr is valid. */ + ia6 = stf_getsrcifa6(ifp); if (ia6 == NULL) return 0; + sin = stf_getin4addr(&ia6_in4addr, &ia6->ia_ifa, STF_GETIN4_USE_CACHE); + if (sin == NULL) + return 0; +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &satosin6(ia6->ia_ifa.ifa_addr)->sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_ifa.ifa_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6->ia_addr.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &satosin6(ia6->ia_ifa.ifa_netmask)->sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_ifa.ifa_netmask = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6->ia_prefixmask.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_prefixmask = %s\n", __func__, buf); + + ip_sprintf(buf, &ia6_in4addr.sin_addr); + DEBUG_PRINTF(1, "%s: ia6_in4addr.sin_addr = %s\n", __func__, buf); + ip_sprintf(buf, &ip.ip_src); + DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", __func__, buf); + ip_sprintf(buf, &ip.ip_dst); + DEBUG_PRINTF(1, "%s: ip.ip_dst = %s\n", __func__, buf); + } +#endif /* * check if IPv4 dst matches the IPv4 address derived from the * local 6to4 address. * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... */ - if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst, - sizeof(ip.ip_dst)) != 0) { - ifa_free(&ia6->ia_ifa); - return 0; + DEBUG_PRINTF(1, "%s: check1: ia6_in4addr.sin_addr == ip.ip_dst?\n", __func__); + if (ia6_in4addr.sin_addr.s_addr != ip.ip_dst.s_addr) { + DEBUG_PRINTF(1, "%s: check1: false. Ignore this packet.\n", __func__); + goto freeit; } - /* - * check if IPv4 src matches the IPv4 address derived from the - * local 6to4 address masked by prefixmask. - * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 - * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 - */ - bzero(&a, sizeof(a)); - bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a)); - bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask)); + DEBUG_PRINTF(1, "%s: check2: ia6->ia_addr is 2002::/16?\n", __func__); + if (IN6_IS_ADDR_6TO4(&ia6->ia_addr.sin6_addr)) { + /* 6to4 (RFC 3056) */ + /* + * check if IPv4 src matches the IPv4 address derived + * from the local 6to4 address masked by prefixmask. + * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 + * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 + */ + DEBUG_PRINTF(1, "%s: check2: true.\n", __func__); + + memcpy(&ia6_in4mask.sin_addr, + GET_V4(&ia6->ia_prefixmask.sin6_addr), + sizeof(ia6_in4mask)); +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ia6_in4addr.sin_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_addr = %s\n", + __func__, buf); + ip_sprintf(buf, &ip.ip_src); + DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", + __func__, buf); + ip_sprintf(buf, &ia6_in4mask.sin_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_prefixmask = %s\n", + __func__, buf); + + DEBUG_PRINTF(1, "%s: check3: ia6_in4addr.sin_addr & mask == ip.ip_src & mask\n", + __func__); + } +#endif + + if ((ia6_in4addr.sin_addr.s_addr & ia6_in4mask.sin_addr.s_addr) != + (ip.ip_src.s_addr & ia6_in4mask.sin_addr.s_addr)) { + DEBUG_PRINTF(1, "%s: check3: false. Ignore this packet.\n", + __func__); + goto freeit; + } + } else { + /* 6rd (RFC 5569) */ + DEBUG_PRINTF(1, "%s: check2: false. 6rd.\n", __func__); + /* + * No restriction on the src address in the case of + * 6rd because the stf(4) interface always has a + * prefix which covers whole of IPv4 src address + * range. So, stf_output() will catch all of + * 6rd-capsuled IPv4 traffic with suspicious inner dst + * IPv4 address (i.e. the IPv6 destination address is + * one the admin does not like to route to outside), + * and then it discard them silently. + */ + } + DEBUG_PRINTF(1, "%s: all clear!\n", __func__); + /* stf interface makes single side match only */ + ret = 32; +freeit: ifa_free(&ia6->ia_ifa); - a.s_addr &= mask.s_addr; - b = ip.ip_src; - b.s_addr &= mask.s_addr; - if (a.s_addr != b.s_addr) - return 0; - /* stf interface makes single side match only */ - return 32; + return (ret); } static struct in6_ifaddr * -stf_getsrcifa6(ifp) - struct ifnet *ifp; +stf_getsrcifa6(struct ifnet *ifp) { - struct ifaddr *ia; + struct ifaddr *ifa; struct in_ifaddr *ia4; - struct sockaddr_in6 *sin6; - struct in_addr in; + struct sockaddr_in *sin; + struct sockaddr_in in4; if_addr_rlock(ifp); - TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { - if (ia->ifa_addr->sa_family != AF_INET6) + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) continue; - sin6 = (struct sockaddr_in6 *)ia->ifa_addr; - if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) + if ((sin = stf_getin4addr(&in4, ifa, + STF_GETIN4_USE_CACHE)) == NULL) continue; - - bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); - LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) - if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) + LIST_FOREACH(ia4, INADDR_HASH(sin->sin_addr.s_addr), ia_hash) + if (ia4->ia_addr.sin_addr.s_addr == sin->sin_addr.s_addr) break; if (ia4 == NULL) continue; - ifa_ref(ia); +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr); + DEBUG_PRINTF(1, "%s: ifa->ifa_addr->sin6_addr = %s\n", + __func__, buf); + ip_sprintf(buf, &ia4->ia_addr.sin_addr); + DEBUG_PRINTF(1, "%s: ia4->ia_addr.sin_addr = %s\n", + __func__, buf); + } +#endif + ifa_ref(ifa); if_addr_runlock(ifp); - return (struct in6_ifaddr *)ia; + return (ifatoia6(ifa)); } if_addr_runlock(ifp); + return NULL; } static int -stf_output(ifp, m, dst, ro) - struct ifnet *ifp; - struct mbuf *m; - struct sockaddr *dst; - struct route *ro; +stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route *ro) { struct stf_softc *sc; struct sockaddr_in6 *dst6; struct route *cached_route; - struct in_addr in4; - caddr_t ptr; + struct sockaddr_in *sin; + struct sockaddr_in in4; struct sockaddr_in *dst4; u_int8_t tos; struct ip *ip; @@ -484,20 +595,28 @@ /* * Pickup the right outer dst addr from the list of candidates. * ip6_dst has priority as it may be able to give us shorter IPv4 hops. + * ip6_dst: destination addr in the packet header. + * dst6: destination addr specified in function argument. */ - ptr = NULL; - if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst)) - ptr = GET_V4(&ip6->ip6_dst); - else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) - ptr = GET_V4(&dst6->sin6_addr); - else { + DEBUG_PRINTF(1, "%s: dst addr selection\n", __func__); + sin = stf_getin4addr_in6(&in4, &ia6->ia_ifa, &ip6->ip6_dst); + if (sin == NULL) + sin = stf_getin4addr_in6(&in4, &ia6->ia_ifa, &dst6->sin6_addr); + if (sin == NULL) { ifa_free(&ia6->ia_ifa); m_freem(m); ifp->if_oerrors++; return ENETUNREACH; } - bcopy(ptr, &in4, sizeof(in4)); +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + ip_sprintf(buf, &sin->sin_addr); + DEBUG_PRINTF(1, "%s: ip_dst = %s\n", __func__, buf); + } +#endif if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -521,11 +640,26 @@ ip = mtod(m, struct ip *); bzero(ip, sizeof(*ip)); + bcopy(&in4.sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)); - bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr), - &ip->ip_src, sizeof(ip->ip_src)); + sin = stf_getin4addr_sin6(&in4, &ia6->ia_ifa, &ia6->ia_addr); + if (sin == NULL) { + ifa_free(&ia6->ia_ifa); + m_freem(m); + ifp->if_oerrors++; + return ENETUNREACH; + } + bcopy(&in4.sin_addr, &ip->ip_src, sizeof(ip->ip_src)); +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ip->ip_src); + DEBUG_PRINTF(1, "%s: ip_src = %s\n", __func__, buf); + } +#endif ifa_free(&ia6->ia_ifa); - bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst)); ip->ip_p = IPPROTO_IPV6; ip->ip_ttl = ip_stf_ttl; ip->ip_len = m->m_pkthdr.len; /*host order*/ @@ -534,7 +668,7 @@ else ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); - if (!stf_route_cache) { + if (!V_stf_route_cache) { cached_route = NULL; goto sendit; } @@ -542,7 +676,7 @@ /* * Do we have a cached route? */ - mtx_lock(&(sc)->sc_ro_mtx); + STF_LOCK(sc); dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; if (dst4->sin_family != AF_INET || bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { @@ -560,10 +694,17 @@ rtalloc_fib(&sc->sc_ro, sc->sc_fibnum); if (sc->sc_ro.ro_rt == NULL) { m_freem(m); - mtx_unlock(&(sc)->sc_ro_mtx); + STF_UNLOCK(sc); ifp->if_oerrors++; return ENETUNREACH; } + if (sc->sc_ro.ro_rt->rt_ifp == ifp) { + /* infinite loop detection */ + m_free(m); + ifp->if_oerrors++; + STF_UNLOCK(sc); + return ENETUNREACH; + } } cached_route = &sc->sc_ro; @@ -570,16 +711,16 @@ sendit: M_SETFIB(m, sc->sc_fibnum); ifp->if_opackets++; + DEBUG_PRINTF(1, "%s: ip_output dispatch.\n", __func__); error = ip_output(m, NULL, cached_route, 0, NULL, NULL); if (cached_route != NULL) - mtx_unlock(&(sc)->sc_ro_mtx); + STF_UNLOCK(sc); return error; } static int -isrfc1918addr(in) - struct in_addr *in; +isrfc1918addr(struct in_addr *in) { /* * returns 1 if private address range: @@ -586,9 +727,9 @@ * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 */ if (stf_permit_rfc1918 == 0 && ( - (ntohl(in->s_addr) & 0xff000000) >> 24 == 10 || - (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 || - (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)) + (ntohl(in->s_addr) & 0xff000000) == 10 << 24 || + (ntohl(in->s_addr) & 0xfff00000) == (172 * 256 + 16) << 16 || + (ntohl(in->s_addr) & 0xffff0000) == (192 * 256 + 168) << 16 )) return 1; return 0; @@ -595,10 +736,7 @@ } static int -stf_checkaddr4(sc, in, inifp) - struct stf_softc *sc; - struct in_addr *in; - struct ifnet *inifp; /* incoming interface */ +stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) { struct in_ifaddr *ia4; @@ -614,20 +752,10 @@ } /* - * reject packets with private address range. - * (requirement from RFC3056 section 2 1st paragraph) - */ - if (isrfc1918addr(in)) - return -1; - - /* * reject packets with broadcast */ IN_IFADDR_RLOCK(); - for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead); - ia4; - ia4 = TAILQ_NEXT(ia4, ia_link)) - { + TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) { if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) continue; if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { @@ -646,7 +774,7 @@ bzero(&sin, sizeof(sin)); sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_len = sizeof(sin); sin.sin_addr = *in; rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0UL, sc->sc_fibnum); @@ -667,10 +795,7 @@ } static int -stf_checkaddr6(sc, in6, inifp) - struct stf_softc *sc; - struct in6_addr *in6; - struct ifnet *inifp; /* incoming interface */ +stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) { /* * check 6to4 addresses @@ -694,9 +819,7 @@ } void -in_stf_input(m, off) - struct mbuf *m; - int off; +in_stf_input(struct mbuf *m, int off) { int proto; struct stf_softc *sc; @@ -704,6 +827,7 @@ struct ip6_hdr *ip6; u_int8_t otos, itos; struct ifnet *ifp; + struct route_in6 rin6; proto = mtod(m, struct ip *)->ip_p; @@ -727,6 +851,17 @@ mac_ifnet_create_mbuf(ifp, m); #endif +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ip->ip_dst); + DEBUG_PRINTF(1, "%s: ip->ip_dst = %s\n", __func__, buf); + ip_sprintf(buf, &ip->ip_src); + DEBUG_PRINTF(1, "%s: ip->ip_src = %s\n", __func__, buf); + } +#endif /* * perform sanity check against outer src/dst. * for source, perform ingress filter as well. @@ -747,6 +882,17 @@ } ip6 = mtod(m, struct ip6_hdr *); +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &ip6->ip6_dst); + DEBUG_PRINTF(1, "%s: ip6->ip6_dst = %s\n", __func__, buf); + ip6_sprintf(buf, &ip6->ip6_src); + DEBUG_PRINTF(1, "%s: ip6->ip6_src = %s\n", __func__, buf); + } +#endif /* * perform sanity check against inner src/dst. * for source, perform ingress filter as well. @@ -757,6 +903,41 @@ return; } + /* + * reject packets with private address range. + * (requirement from RFC3056 section 2 1st paragraph) + */ + if ((IN6_IS_ADDR_6TO4(&ip6->ip6_src) && isrfc1918addr(&ip->ip_src)) || + (IN6_IS_ADDR_6TO4(&ip6->ip6_dst) && isrfc1918addr(&ip->ip_dst))) { + m_freem(m); + return; + } + + /* + * Ignore if the destination is the same stf interface because + * all of valid IPv6 outgoing traffic should go interfaces + * except for it. + */ + memset(&rin6, 0, sizeof(rin6)); + rin6.ro_dst.sin6_len = sizeof(rin6.ro_dst); + rin6.ro_dst.sin6_family = AF_INET6; + memcpy(&rin6.ro_dst.sin6_addr, &ip6->ip6_dst, + sizeof(rin6.ro_dst.sin6_addr)); + rtalloc((struct route *)&rin6); + if (rin6.ro_rt == NULL) { + DEBUG_PRINTF(1, "%s: no IPv6 dst. Ignored.\n", __func__); + m_free(m); + return; + } + if ((rin6.ro_rt->rt_ifp == ifp) && + (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &rin6.ro_dst.sin6_addr))) { + DEBUG_PRINTF(1, "%s: IPv6 dst is the same stf. Ignored.\n", __func__); + RTFREE(rin6.ro_rt); + m_free(m); + return; + } + RTFREE(rin6.ro_rt); + itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; if ((ifp->if_flags & IFF_LINK1) != 0) ip_ecn_egress(ECN_ALLOWED, &otos, &itos); @@ -766,7 +947,7 @@ ip6->ip6_flow |= htonl((u_int32_t)itos << 20); m->m_pkthdr.rcvif = ifp; - + if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -779,6 +960,7 @@ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); } + DEBUG_PRINTF(1, "%s: netisr_dispatch(NETISR_IPV6)\n", __func__); /* * Put the packet to the network layer input queue according to the * specified address family. @@ -793,27 +975,252 @@ /* ARGSUSED */ static void -stf_rtrequest(cmd, rt, info) - int cmd; - struct rtentry *rt; - struct rt_addrinfo *info; +stf_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { RT_LOCK_ASSERT(rt); rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; } +/* Check whether we have at least one instance with IFF_UP. */ + +static struct sockaddr_in * +stf_getin4addr_in6(struct sockaddr_in *sin, + struct ifaddr *ifa, + struct in6_addr *in6) +{ + struct sockaddr_in6 sin6; + + DEBUG_PRINTF(1, "%s: enter.\n", __func__); + if (ifa == NULL || in6 == NULL) + return NULL; + + memset(&sin6, 0, sizeof(sin6)); + memcpy(&sin6.sin6_addr, in6, sizeof(sin6.sin6_addr)); + sin6.sin6_len = sizeof(sin6); + sin6.sin6_family = AF_INET6; + + return(stf_getin4addr_sin6(sin, ifa, &sin6)); +} + +static struct sockaddr_in * +stf_getin4addr_sin6(struct sockaddr_in *sin, + struct ifaddr *ifa, + struct sockaddr_in6 *sin6) +{ + struct in6_ifaddr ia6; + int i; + + DEBUG_PRINTF(1, "%s: enter.\n", __func__); + if (ifa == NULL || sin6 == NULL) + return NULL; + + memset(&ia6, 0, sizeof(ia6)); + memcpy(&ia6, ifatoia6(ifa), sizeof(ia6)); + + /* + * Use prefixmask information from ifa, and + * address information from sin6. + */ + ia6.ia_addr.sin6_family = AF_INET6; + ia6.ia_ifa.ifa_addr = (struct sockaddr *)&ia6.ia_addr; + ia6.ia_ifa.ifa_dstaddr = NULL; + ia6.ia_ifa.ifa_netmask = (struct sockaddr *)&ia6.ia_prefixmask; + +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &sin6->sin6_addr); + DEBUG_PRINTF(1, "%s: sin6->sin6_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6.ia_addr.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6.ia_addr.sin6_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6.ia_prefixmask.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6.ia_prefixmask.sin6_addr = %s\n", __func__, buf); + } +#endif + + /* + * When (src addr & src mask) != (dst (sin6) addr & src mask), + * the dst is not in the 6rd domain. The IPv4 address must + * not be used. + */ + for (i = 0; i < sizeof(ia6.ia_addr.sin6_addr); i++) { + if ((((u_char *)&ia6.ia_addr.sin6_addr)[i] & + ((u_char *)&ia6.ia_prefixmask.sin6_addr)[i]) + != + (((u_char *)&sin6->sin6_addr)[i] & + ((u_char *)&ia6.ia_prefixmask.sin6_addr)[i])) + return NULL; + } + + /* After the mask check, overwrite ia6.ia_addr with sin6. */ + memcpy(&ia6.ia_addr, sin6, sizeof(ia6.ia_addr)); + return(stf_getin4addr(sin, (struct ifaddr *)&ia6, 0)); +} + +static struct sockaddr_in * +stf_getin4addr(struct sockaddr_in *sin, + struct ifaddr *ifa, + int flags) +{ + struct in_addr *in; + struct sockaddr_in6 *sin6; + struct in6_ifaddr *ia6; + + DEBUG_PRINTF(1, "%s: enter.\n", __func__); + if (ifa == NULL || + ifa->ifa_addr == NULL || + ifa->ifa_addr->sa_family != AF_INET6) + return NULL; + + sin6 = satosin6(ifa->ifa_addr); + ia6 = ifatoia6(ifa); + + if ((flags & STF_GETIN4_USE_CACHE) && + (ifa->ifa_dstaddr != NULL) && + (ifa->ifa_dstaddr->sa_family == AF_INET)) { + /* + * XXX: ifa_dstaddr is used as a cache of the + * extracted IPv4 address. + */ + memcpy(sin, satosin(ifa->ifa_dstaddr), sizeof(*sin)); +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &sin->sin_addr); + DEBUG_PRINTF(1, "%s: cached address was used = %s\n", __func__, buf); + } +#endif + return (sin); + } + memset(sin, 0, sizeof(*sin)); + in = &sin->sin_addr; + +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &sin6->sin6_addr); + DEBUG_PRINTF(1, "%s: sin6->sin6_addr = %s\n", __func__, buf); + } +#endif + + if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { + /* 6to4 (RFC 3056) */ + bcopy(GET_V4(&sin6->sin6_addr), in, sizeof(*in)); + if (isrfc1918addr(in)) + return NULL; + } else { + /* 6rd (RFC 5569) */ + struct in6_addr buf; + u_char *p = (u_char *)&buf; + u_char *q = (u_char *)in; + u_int residue = 0; + u_char mask; + int i; + u_int plen; + + /* + * 6rd-relays IPv6 prefix is located at a 32-bit just + * after the prefix edge. + */ + plen = in6_mask2len(&satosin6(ifa->ifa_netmask)->sin6_addr, NULL); + if (32 < plen) + return NULL; + + memcpy(&buf, &sin6->sin6_addr, sizeof(buf)); + p += plen / 8; + residue = plen % 8; + mask = ~((u_char)(-1) >> residue); + + /* + * The p points head of the IPv4 address part in + * bytes. The residue is a bit-shift factor when + * prefixlen is not a multiple of 8. + */ + for (i = 0; i < 4; i++) { + DEBUG_PRINTF(2, "p[%d] = %d\n", i, p[i]); + DEBUG_PRINTF(2, "residue = %d\n", residue); + if (residue) { + p[i] <<= residue; + DEBUG_PRINTF(2, "p[%d] << residue = %d\n", + i, p[i]); + DEBUG_PRINTF(2, "mask = %x\n", + mask); + DEBUG_PRINTF(2, "p[%d + 1] & mask = %d\n", + i, p[i + 1] & mask); + DEBUG_PRINTF(2, "p[%d + 1] & mask >> (8 - residue) = %d\n", + i, (p[i + 1] & mask) >> (8-residue)); + p[i] |= ((p[i+1] & mask) >> (8 - residue)); + } + q[i] = p[i]; + } + } +#if STF_DEBUG + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, in); + DEBUG_PRINTF(1, "%s: in->in_addr = %s\n", __func__, buf); + DEBUG_PRINTF(1, "%s: leave\n", __func__); + } +#endif + if (flags & STF_GETIN4_USE_CACHE) { + DEBUG_PRINTF(1, "%s: try to access ifa->ifa_dstaddr.\n", __func__); + ifa->ifa_dstaddr = (struct sockaddr *)&ia6->ia_dstaddr; + DEBUG_PRINTF(1, "%s: try to memset 0 to ia_dstaddr.\n", __func__); + memset(&ia6->ia_dstaddr, 0, sizeof(ia6->ia_dstaddr)); + DEBUG_PRINTF(1, "%s: try to memcpy ifa->ifa_dstaddr.\n", __func__); + memcpy((struct sockaddr_in *)ifa->ifa_dstaddr, + sin, sizeof(struct sockaddr_in)); + DEBUG_PRINTF(1, "%s: try to set sa_family.\n", __func__); + ifa->ifa_dstaddr->sa_family = AF_INET; + DEBUG_PRINTF(1, "%s: in->in_addr is stored in ifa_dstaddr.\n", + __func__); + } + return (sin); +} + + static int -stf_ioctl(ifp, cmd, data) - struct ifnet *ifp; - u_long cmd; - caddr_t data; +stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifaddr *ifa; struct ifreq *ifr; - struct sockaddr_in6 *sin6; - struct in_addr addr; + struct sockaddr_in in4; + //struct sockaddr_in6 *sin6; + //struct in_addr addr; int error, mtu; + /* + * Sanity check: if more than two interfaces have IFF_UP, do + * if_down() for all of them except for the specified one. + */ + if (ifp->if_flags & IFF_UP) { + struct stf_softc *sc_cur = ifp->if_softc; + struct stf_softc *sc; + + mtx_lock(&stf_mtx); + LIST_FOREACH(sc, &V_stf_softc_list, stf_list) { + if (sc == sc_cur) + continue; + if ((STF2IFP(sc)->if_flags & IFF_UP) != 0) { + if_printf(STF2IFP(sc), + "marked as DOWN because at least " + "one instance of stf(4) is already " + "working.\n"); + if_down(STF2IFP(sc)); + } + } + mtx_unlock(&stf_mtx); + } + + error = 0; switch (cmd) { case SIOCSIFADDR: @@ -822,17 +1229,16 @@ error = EAFNOSUPPORT; break; } - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { + if (stf_getin4addr(&in4, ifa, 0) == NULL) { error = EINVAL; break; } - bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr)); - if (isrfc1918addr(&addr)) { - error = EINVAL; - break; - } - + /* + * XXX: ifa_dstaddr is used as a cache of the + * extracted IPv4 address. + */ + if (ifa->ifa_dstaddr != NULL) + ifa->ifa_dstaddr->sa_family = AF_UNSPEC; ifa->ifa_rtrequest = stf_rtrequest; ifp->if_flags |= IFF_UP; break;