diff --git a/Documentation/Makefile b/Documentation/Makefile index fc759598c4c9..59d516b7afcb 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -1,4 +1,3 @@ subdir-y := accounting auxdisplay blackfin connector \ filesystems filesystems ia64 laptops misc-devices \ - networking pcmcia prctl ptp spi timers vDSO video4linux \ - watchdog + pcmcia prctl ptp spi timers vDSO video4linux watchdog diff --git a/Documentation/networking/Makefile b/Documentation/networking/Makefile deleted file mode 100644 index 4c5d7c485439..000000000000 --- a/Documentation/networking/Makefile +++ /dev/null @@ -1 +0,0 @@ -subdir-y := timestamping diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore deleted file mode 100644 index 9e69e982fb38..000000000000 --- a/Documentation/networking/timestamping/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -timestamping -txtimestamp -hwtstamp_config diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile deleted file mode 100644 index 8c20dfaa4d6e..000000000000 --- a/Documentation/networking/timestamping/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# To compile, from the source root -# -# make headers_install -# make M=documentation - -# List of programs to build -hostprogs-y := hwtstamp_config timestamping txtimestamp - -# Tell kbuild to always build the programs -always := $(hostprogs-y) - -HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include -HOSTCFLAGS_txtimestamp.o += -I$(objtree)/usr/include -HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include diff --git a/Documentation/networking/timestamping/hwtstamp_config.c b/Documentation/networking/timestamping/hwtstamp_config.c deleted file mode 100644 index e8b685a7f15f..000000000000 --- a/Documentation/networking/timestamping/hwtstamp_config.c +++ /dev/null @@ -1,134 +0,0 @@ -/* Test program for SIOC{G,S}HWTSTAMP - * Copyright 2013 Solarflare Communications - * Author: Ben Hutchings - */ - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -static int -lookup_value(const char **names, int size, const char *name) -{ - int value; - - for (value = 0; value < size; value++) - if (names[value] && strcasecmp(names[value], name) == 0) - return value; - - return -1; -} - -static const char * -lookup_name(const char **names, int size, int value) -{ - return (value >= 0 && value < size) ? names[value] : NULL; -} - -static void list_names(FILE *f, const char **names, int size) -{ - int value; - - for (value = 0; value < size; value++) - if (names[value]) - fprintf(f, " %s\n", names[value]); -} - -static const char *tx_types[] = { -#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name - TX_TYPE(OFF), - TX_TYPE(ON), - TX_TYPE(ONESTEP_SYNC) -#undef TX_TYPE -}; -#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0]))) - -static const char *rx_filters[] = { -#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name - RX_FILTER(NONE), - RX_FILTER(ALL), - RX_FILTER(SOME), - RX_FILTER(PTP_V1_L4_EVENT), - RX_FILTER(PTP_V1_L4_SYNC), - RX_FILTER(PTP_V1_L4_DELAY_REQ), - RX_FILTER(PTP_V2_L4_EVENT), - RX_FILTER(PTP_V2_L4_SYNC), - RX_FILTER(PTP_V2_L4_DELAY_REQ), - RX_FILTER(PTP_V2_L2_EVENT), - RX_FILTER(PTP_V2_L2_SYNC), - RX_FILTER(PTP_V2_L2_DELAY_REQ), - RX_FILTER(PTP_V2_EVENT), - RX_FILTER(PTP_V2_SYNC), - RX_FILTER(PTP_V2_DELAY_REQ), -#undef RX_FILTER -}; -#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0]))) - -static void usage(void) -{ - fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n" - "tx_type is any of (case-insensitive):\n", - stderr); - list_names(stderr, tx_types, N_TX_TYPES); - fputs("rx_filter is any of (case-insensitive):\n", stderr); - list_names(stderr, rx_filters, N_RX_FILTERS); -} - -int main(int argc, char **argv) -{ - struct ifreq ifr; - struct hwtstamp_config config; - const char *name; - int sock; - - if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) { - usage(); - return 2; - } - - if (argc == 4) { - config.flags = 0; - config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]); - config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]); - if (config.tx_type < 0 || config.rx_filter < 0) { - usage(); - return 2; - } - } - - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - perror("socket"); - return 1; - } - - strcpy(ifr.ifr_name, argv[1]); - ifr.ifr_data = (caddr_t)&config; - - if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) { - perror("ioctl"); - return 1; - } - - printf("flags = %#x\n", config.flags); - name = lookup_name(tx_types, N_TX_TYPES, config.tx_type); - if (name) - printf("tx_type = %s\n", name); - else - printf("tx_type = %d\n", config.tx_type); - name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter); - if (name) - printf("rx_filter = %s\n", name); - else - printf("rx_filter = %d\n", config.rx_filter); - - return 0; -} diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c deleted file mode 100644 index 5cdfd743447b..000000000000 --- a/Documentation/networking/timestamping/timestamping.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * This program demonstrates how the various time stamping features in - * the Linux kernel work. It emulates the behavior of a PTP - * implementation in stand-alone master mode by sending PTPv1 Sync - * multicasts once every second. It looks for similar packets, but - * beyond that doesn't actually implement PTP. - * - * Outgoing packets are time stamped with SO_TIMESTAMPING with or - * without hardware support. - * - * Incoming packets are time stamped with SO_TIMESTAMPING with or - * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and - * SO_TIMESTAMP[NS]. - * - * Copyright (C) 2009 Intel Corporation. - * Author: Patrick Ohly - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#ifndef SO_TIMESTAMPING -# define SO_TIMESTAMPING 37 -# define SCM_TIMESTAMPING SO_TIMESTAMPING -#endif - -#ifndef SO_TIMESTAMPNS -# define SO_TIMESTAMPNS 35 -#endif - -#ifndef SIOCGSTAMPNS -# define SIOCGSTAMPNS 0x8907 -#endif - -#ifndef SIOCSHWTSTAMP -# define SIOCSHWTSTAMP 0x89b0 -#endif - -static void usage(const char *error) -{ - if (error) - printf("invalid option: %s\n", error); - printf("timestamping interface option*\n\n" - "Options:\n" - " IP_MULTICAST_LOOP - looping outgoing multicasts\n" - " SO_TIMESTAMP - normal software time stamping, ms resolution\n" - " SO_TIMESTAMPNS - more accurate software time stamping\n" - " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n" - " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n" - " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" - " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" - " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" - " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" - " SIOCGSTAMP - check last socket time stamp\n" - " SIOCGSTAMPNS - more accurate socket time stamp\n"); - exit(1); -} - -static void bail(const char *error) -{ - printf("%s: %s\n", error, strerror(errno)); - exit(1); -} - -static const unsigned char sync[] = { - 0x00, 0x01, 0x00, 0x01, - 0x5f, 0x44, 0x46, 0x4c, - 0x54, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x01, 0x01, - - /* fake uuid */ - 0x00, 0x01, - 0x02, 0x03, 0x04, 0x05, - - 0x00, 0x01, 0x00, 0x37, - 0x00, 0x00, 0x00, 0x08, - 0x00, 0x00, 0x00, 0x00, - 0x49, 0x05, 0xcd, 0x01, - 0x29, 0xb1, 0x8d, 0xb0, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, - - /* fake uuid */ - 0x00, 0x01, - 0x02, 0x03, 0x04, 0x05, - - 0x00, 0x00, 0x00, 0x37, - 0x00, 0x00, 0x00, 0x04, - 0x44, 0x46, 0x4c, 0x54, - 0x00, 0x00, 0xf0, 0x60, - 0x00, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0xf0, 0x60, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x04, - 0x44, 0x46, 0x4c, 0x54, - 0x00, 0x01, - - /* fake uuid */ - 0x00, 0x01, - 0x02, 0x03, 0x04, 0x05, - - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 -}; - -static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) -{ - struct timeval now; - int res; - - res = sendto(sock, sync, sizeof(sync), 0, - addr, addr_len); - gettimeofday(&now, 0); - if (res < 0) - printf("%s: %s\n", "send", strerror(errno)); - else - printf("%ld.%06ld: sent %d bytes\n", - (long)now.tv_sec, (long)now.tv_usec, - res); -} - -static void printpacket(struct msghdr *msg, int res, - char *data, - int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) -{ - struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; - struct cmsghdr *cmsg; - struct timeval tv; - struct timespec ts; - struct timeval now; - - gettimeofday(&now, 0); - - printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n", - (long)now.tv_sec, (long)now.tv_usec, - (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", - res, - inet_ntoa(from_addr->sin_addr), - msg->msg_controllen); - for (cmsg = CMSG_FIRSTHDR(msg); - cmsg; - cmsg = CMSG_NXTHDR(msg, cmsg)) { - printf(" cmsg len %zu: ", cmsg->cmsg_len); - switch (cmsg->cmsg_level) { - case SOL_SOCKET: - printf("SOL_SOCKET "); - switch (cmsg->cmsg_type) { - case SO_TIMESTAMP: { - struct timeval *stamp = - (struct timeval *)CMSG_DATA(cmsg); - printf("SO_TIMESTAMP %ld.%06ld", - (long)stamp->tv_sec, - (long)stamp->tv_usec); - break; - } - case SO_TIMESTAMPNS: { - struct timespec *stamp = - (struct timespec *)CMSG_DATA(cmsg); - printf("SO_TIMESTAMPNS %ld.%09ld", - (long)stamp->tv_sec, - (long)stamp->tv_nsec); - break; - } - case SO_TIMESTAMPING: { - struct timespec *stamp = - (struct timespec *)CMSG_DATA(cmsg); - printf("SO_TIMESTAMPING "); - printf("SW %ld.%09ld ", - (long)stamp->tv_sec, - (long)stamp->tv_nsec); - stamp++; - /* skip deprecated HW transformed */ - stamp++; - printf("HW raw %ld.%09ld", - (long)stamp->tv_sec, - (long)stamp->tv_nsec); - break; - } - default: - printf("type %d", cmsg->cmsg_type); - break; - } - break; - case IPPROTO_IP: - printf("IPPROTO_IP "); - switch (cmsg->cmsg_type) { - case IP_RECVERR: { - struct sock_extended_err *err = - (struct sock_extended_err *)CMSG_DATA(cmsg); - printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s", - strerror(err->ee_errno), - err->ee_origin, -#ifdef SO_EE_ORIGIN_TIMESTAMPING - err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ? - "bounced packet" : "unexpected origin" -#else - "probably SO_EE_ORIGIN_TIMESTAMPING" -#endif - ); - if (res < sizeof(sync)) - printf(" => truncated data?!"); - else if (!memcmp(sync, data + res - sizeof(sync), - sizeof(sync))) - printf(" => GOT OUR DATA BACK (HURRAY!)"); - break; - } - case IP_PKTINFO: { - struct in_pktinfo *pktinfo = - (struct in_pktinfo *)CMSG_DATA(cmsg); - printf("IP_PKTINFO interface index %u", - pktinfo->ipi_ifindex); - break; - } - default: - printf("type %d", cmsg->cmsg_type); - break; - } - break; - default: - printf("level %d type %d", - cmsg->cmsg_level, - cmsg->cmsg_type); - break; - } - printf("\n"); - } - - if (siocgstamp) { - if (ioctl(sock, SIOCGSTAMP, &tv)) - printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno)); - else - printf("SIOCGSTAMP %ld.%06ld\n", - (long)tv.tv_sec, - (long)tv.tv_usec); - } - if (siocgstampns) { - if (ioctl(sock, SIOCGSTAMPNS, &ts)) - printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno)); - else - printf("SIOCGSTAMPNS %ld.%09ld\n", - (long)ts.tv_sec, - (long)ts.tv_nsec); - } -} - -static void recvpacket(int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) -{ - char data[256]; - struct msghdr msg; - struct iovec entry; - struct sockaddr_in from_addr; - struct { - struct cmsghdr cm; - char control[512]; - } control; - int res; - - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = &entry; - msg.msg_iovlen = 1; - entry.iov_base = data; - entry.iov_len = sizeof(data); - msg.msg_name = (caddr_t)&from_addr; - msg.msg_namelen = sizeof(from_addr); - msg.msg_control = &control; - msg.msg_controllen = sizeof(control); - - res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT); - if (res < 0) { - printf("%s %s: %s\n", - "recvmsg", - (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", - strerror(errno)); - } else { - printpacket(&msg, res, data, - sock, recvmsg_flags, - siocgstamp, siocgstampns); - } -} - -int main(int argc, char **argv) -{ - int so_timestamping_flags = 0; - int so_timestamp = 0; - int so_timestampns = 0; - int siocgstamp = 0; - int siocgstampns = 0; - int ip_multicast_loop = 0; - char *interface; - int i; - int enabled = 1; - int sock; - struct ifreq device; - struct ifreq hwtstamp; - struct hwtstamp_config hwconfig, hwconfig_requested; - struct sockaddr_in addr; - struct ip_mreq imr; - struct in_addr iaddr; - int val; - socklen_t len; - struct timeval next; - - if (argc < 2) - usage(0); - interface = argv[1]; - - for (i = 2; i < argc; i++) { - if (!strcasecmp(argv[i], "SO_TIMESTAMP")) - so_timestamp = 1; - else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) - so_timestampns = 1; - else if (!strcasecmp(argv[i], "SIOCGSTAMP")) - siocgstamp = 1; - else if (!strcasecmp(argv[i], "SIOCGSTAMPNS")) - siocgstampns = 1; - else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) - ip_multicast_loop = 1; - else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; - else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; - else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; - else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; - else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; - else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; - else - usage(argv[i]); - } - - sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (sock < 0) - bail("socket"); - - memset(&device, 0, sizeof(device)); - strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); - if (ioctl(sock, SIOCGIFADDR, &device) < 0) - bail("getting interface IP address"); - - memset(&hwtstamp, 0, sizeof(hwtstamp)); - strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); - hwtstamp.ifr_data = (void *)&hwconfig; - memset(&hwconfig, 0, sizeof(hwconfig)); - hwconfig.tx_type = - (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? - HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - hwconfig.rx_filter = - (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? - HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; - hwconfig_requested = hwconfig; - if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { - if ((errno == EINVAL || errno == ENOTSUP) && - hwconfig_requested.tx_type == HWTSTAMP_TX_OFF && - hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE) - printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n"); - else - bail("SIOCSHWTSTAMP"); - } - printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n", - hwconfig_requested.tx_type, hwconfig.tx_type, - hwconfig_requested.rx_filter, hwconfig.rx_filter); - - /* bind to PTP port */ - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = htons(319 /* PTP event port */); - if (bind(sock, - (struct sockaddr *)&addr, - sizeof(struct sockaddr_in)) < 0) - bail("bind"); - - /* set multicast group for outgoing packets */ - inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ - addr.sin_addr = iaddr; - imr.imr_multiaddr.s_addr = iaddr.s_addr; - imr.imr_interface.s_addr = - ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr; - if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, - &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0) - bail("set multicast"); - - /* join multicast group, loop our own packet */ - if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, - &imr, sizeof(struct ip_mreq)) < 0) - bail("join multicast group"); - - if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, - &ip_multicast_loop, sizeof(enabled)) < 0) { - bail("loop multicast"); - } - - /* set socket options for time stamping */ - if (so_timestamp && - setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, - &enabled, sizeof(enabled)) < 0) - bail("setsockopt SO_TIMESTAMP"); - - if (so_timestampns && - setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, - &enabled, sizeof(enabled)) < 0) - bail("setsockopt SO_TIMESTAMPNS"); - - if (so_timestamping_flags && - setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, - &so_timestamping_flags, - sizeof(so_timestamping_flags)) < 0) - bail("setsockopt SO_TIMESTAMPING"); - - /* request IP_PKTINFO for debugging purposes */ - if (setsockopt(sock, SOL_IP, IP_PKTINFO, - &enabled, sizeof(enabled)) < 0) - printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno)); - - /* verify socket options */ - len = sizeof(val); - if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0) - printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno)); - else - printf("SO_TIMESTAMP %d\n", val); - - if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0) - printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS", - strerror(errno)); - else - printf("SO_TIMESTAMPNS %d\n", val); - - if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { - printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", - strerror(errno)); - } else { - printf("SO_TIMESTAMPING %d\n", val); - if (val != so_timestamping_flags) - printf(" not the expected value %d\n", - so_timestamping_flags); - } - - /* send packets forever every five seconds */ - gettimeofday(&next, 0); - next.tv_sec = (next.tv_sec + 1) / 5 * 5; - next.tv_usec = 0; - while (1) { - struct timeval now; - struct timeval delta; - long delta_us; - int res; - fd_set readfs, errorfs; - - gettimeofday(&now, 0); - delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 + - (long)(next.tv_usec - now.tv_usec); - if (delta_us > 0) { - /* continue waiting for timeout or data */ - delta.tv_sec = delta_us / 1000000; - delta.tv_usec = delta_us % 1000000; - - FD_ZERO(&readfs); - FD_ZERO(&errorfs); - FD_SET(sock, &readfs); - FD_SET(sock, &errorfs); - printf("%ld.%06ld: select %ldus\n", - (long)now.tv_sec, (long)now.tv_usec, - delta_us); - res = select(sock + 1, &readfs, 0, &errorfs, &delta); - gettimeofday(&now, 0); - printf("%ld.%06ld: select returned: %d, %s\n", - (long)now.tv_sec, (long)now.tv_usec, - res, - res < 0 ? strerror(errno) : "success"); - if (res > 0) { - if (FD_ISSET(sock, &readfs)) - printf("ready for reading\n"); - if (FD_ISSET(sock, &errorfs)) - printf("has error\n"); - recvpacket(sock, 0, - siocgstamp, - siocgstampns); - recvpacket(sock, MSG_ERRQUEUE, - siocgstamp, - siocgstampns); - } - } else { - /* write one packet */ - sendpacket(sock, - (struct sockaddr *)&addr, - sizeof(addr)); - next.tv_sec += 5; - continue; - } - } - - return 0; -} diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c deleted file mode 100644 index 5df07047ca86..000000000000 --- a/Documentation/networking/timestamping/txtimestamp.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * Copyright 2014 Google Inc. - * Author: willemb@google.com (Willem de Bruijn) - * - * Test software tx timestamping, including - * - * - SCHED, SND and ACK timestamps - * - RAW, UDP and TCP - * - IPv4 and IPv6 - * - various packet sizes (to test GSO and TSO) - * - * Consult the command line arguments for help on running - * the various testcases. - * - * This test requires a dummy TCP server. - * A simple `nc6 [-u] -l -p $DESTPORT` will do - * - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* command line parameters */ -static int cfg_proto = SOCK_STREAM; -static int cfg_ipproto = IPPROTO_TCP; -static int cfg_num_pkts = 4; -static int do_ipv4 = 1; -static int do_ipv6 = 1; -static int cfg_payload_len = 10; -static bool cfg_show_payload; -static bool cfg_do_pktinfo; -static bool cfg_loop_nodata; -static uint16_t dest_port = 9000; - -static struct sockaddr_in daddr; -static struct sockaddr_in6 daddr6; -static struct timespec ts_prev; - -static void __print_timestamp(const char *name, struct timespec *cur, - uint32_t key, int payload_len) -{ - if (!(cur->tv_sec | cur->tv_nsec)) - return; - - fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", - name, cur->tv_sec, cur->tv_nsec / 1000, - key, payload_len); - - if ((ts_prev.tv_sec | ts_prev.tv_nsec)) { - int64_t cur_ms, prev_ms; - - cur_ms = (long) cur->tv_sec * 1000 * 1000; - cur_ms += cur->tv_nsec / 1000; - - prev_ms = (long) ts_prev.tv_sec * 1000 * 1000; - prev_ms += ts_prev.tv_nsec / 1000; - - fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms); - } - - ts_prev = *cur; - fprintf(stderr, "\n"); -} - -static void print_timestamp_usr(void) -{ - struct timespec ts; - struct timeval tv; /* avoid dependency on -lrt */ - - gettimeofday(&tv, NULL); - ts.tv_sec = tv.tv_sec; - ts.tv_nsec = tv.tv_usec * 1000; - - __print_timestamp(" USR", &ts, 0, 0); -} - -static void print_timestamp(struct scm_timestamping *tss, int tstype, - int tskey, int payload_len) -{ - const char *tsname; - - switch (tstype) { - case SCM_TSTAMP_SCHED: - tsname = " ENQ"; - break; - case SCM_TSTAMP_SND: - tsname = " SND"; - break; - case SCM_TSTAMP_ACK: - tsname = " ACK"; - break; - default: - error(1, 0, "unknown timestamp type: %u", - tstype); - } - __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); -} - -/* TODO: convert to check_and_print payload once API is stable */ -static void print_payload(char *data, int len) -{ - int i; - - if (!len) - return; - - if (len > 70) - len = 70; - - fprintf(stderr, "payload: "); - for (i = 0; i < len; i++) - fprintf(stderr, "%02hhx ", data[i]); - fprintf(stderr, "\n"); -} - -static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) -{ - char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN]; - - fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n", - ifindex, - saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown", - daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); -} - -static void __poll(int fd) -{ - struct pollfd pollfd; - int ret; - - memset(&pollfd, 0, sizeof(pollfd)); - pollfd.fd = fd; - ret = poll(&pollfd, 1, 100); - if (ret != 1) - error(1, errno, "poll"); -} - -static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) -{ - struct sock_extended_err *serr = NULL; - struct scm_timestamping *tss = NULL; - struct cmsghdr *cm; - int batch = 0; - - for (cm = CMSG_FIRSTHDR(msg); - cm && cm->cmsg_len; - cm = CMSG_NXTHDR(msg, cm)) { - if (cm->cmsg_level == SOL_SOCKET && - cm->cmsg_type == SCM_TIMESTAMPING) { - tss = (void *) CMSG_DATA(cm); - } else if ((cm->cmsg_level == SOL_IP && - cm->cmsg_type == IP_RECVERR) || - (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_RECVERR)) { - serr = (void *) CMSG_DATA(cm); - if (serr->ee_errno != ENOMSG || - serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { - fprintf(stderr, "unknown ip error %d %d\n", - serr->ee_errno, - serr->ee_origin); - serr = NULL; - } - } else if (cm->cmsg_level == SOL_IP && - cm->cmsg_type == IP_PKTINFO) { - struct in_pktinfo *info = (void *) CMSG_DATA(cm); - print_pktinfo(AF_INET, info->ipi_ifindex, - &info->ipi_spec_dst, &info->ipi_addr); - } else if (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_PKTINFO) { - struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm); - print_pktinfo(AF_INET6, info6->ipi6_ifindex, - NULL, &info6->ipi6_addr); - } else - fprintf(stderr, "unknown cmsg %d,%d\n", - cm->cmsg_level, cm->cmsg_type); - - if (serr && tss) { - print_timestamp(tss, serr->ee_info, serr->ee_data, - payload_len); - serr = NULL; - tss = NULL; - batch++; - } - } - - if (batch > 1) - fprintf(stderr, "batched %d timestamps\n", batch); -} - -static int recv_errmsg(int fd) -{ - static char ctrl[1024 /* overprovision*/]; - static struct msghdr msg; - struct iovec entry; - static char *data; - int ret = 0; - - data = malloc(cfg_payload_len); - if (!data) - error(1, 0, "malloc"); - - memset(&msg, 0, sizeof(msg)); - memset(&entry, 0, sizeof(entry)); - memset(ctrl, 0, sizeof(ctrl)); - - entry.iov_base = data; - entry.iov_len = cfg_payload_len; - msg.msg_iov = &entry; - msg.msg_iovlen = 1; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = ctrl; - msg.msg_controllen = sizeof(ctrl); - - ret = recvmsg(fd, &msg, MSG_ERRQUEUE); - if (ret == -1 && errno != EAGAIN) - error(1, errno, "recvmsg"); - - if (ret >= 0) { - __recv_errmsg_cmsg(&msg, ret); - if (cfg_show_payload) - print_payload(data, cfg_payload_len); - } - - free(data); - return ret == -1; -} - -static void do_test(int family, unsigned int opt) -{ - char *buf; - int fd, i, val = 1, total_len; - - if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { - /* due to lack of checksum generation code */ - fprintf(stderr, "test: skipping datagram over IPv6\n"); - return; - } - - total_len = cfg_payload_len; - if (cfg_proto == SOCK_RAW) { - total_len += sizeof(struct udphdr); - if (cfg_ipproto == IPPROTO_RAW) - total_len += sizeof(struct iphdr); - } - - buf = malloc(total_len); - if (!buf) - error(1, 0, "malloc"); - - fd = socket(family, cfg_proto, cfg_ipproto); - if (fd < 0) - error(1, errno, "socket"); - - if (cfg_proto == SOCK_STREAM) { - if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, - (char*) &val, sizeof(val))) - error(1, 0, "setsockopt no nagle"); - - if (family == PF_INET) { - if (connect(fd, (void *) &daddr, sizeof(daddr))) - error(1, errno, "connect ipv4"); - } else { - if (connect(fd, (void *) &daddr6, sizeof(daddr6))) - error(1, errno, "connect ipv6"); - } - } - - if (cfg_do_pktinfo) { - if (family == AF_INET6) { - if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO, - &val, sizeof(val))) - error(1, errno, "setsockopt pktinfo ipv6"); - } else { - if (setsockopt(fd, SOL_IP, IP_PKTINFO, - &val, sizeof(val))) - error(1, errno, "setsockopt pktinfo ipv4"); - } - } - - opt |= SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_OPT_CMSG | - SOF_TIMESTAMPING_OPT_ID; - if (cfg_loop_nodata) - opt |= SOF_TIMESTAMPING_OPT_TSONLY; - - if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, - (char *) &opt, sizeof(opt))) - error(1, 0, "setsockopt timestamping"); - - for (i = 0; i < cfg_num_pkts; i++) { - memset(&ts_prev, 0, sizeof(ts_prev)); - memset(buf, 'a' + i, total_len); - - if (cfg_proto == SOCK_RAW) { - struct udphdr *udph; - int off = 0; - - if (cfg_ipproto == IPPROTO_RAW) { - struct iphdr *iph = (void *) buf; - - memset(iph, 0, sizeof(*iph)); - iph->ihl = 5; - iph->version = 4; - iph->ttl = 2; - iph->daddr = daddr.sin_addr.s_addr; - iph->protocol = IPPROTO_UDP; - /* kernel writes saddr, csum, len */ - - off = sizeof(*iph); - } - - udph = (void *) buf + off; - udph->source = ntohs(9000); /* random spoof */ - udph->dest = ntohs(dest_port); - udph->len = ntohs(sizeof(*udph) + cfg_payload_len); - udph->check = 0; /* not allowed for IPv6 */ - } - - print_timestamp_usr(); - if (cfg_proto != SOCK_STREAM) { - if (family == PF_INET) - val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr)); - else - val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6)); - } else { - val = send(fd, buf, cfg_payload_len, 0); - } - if (val != total_len) - error(1, errno, "send"); - - /* wait for all errors to be queued, else ACKs arrive OOO */ - usleep(50 * 1000); - - __poll(fd); - - while (!recv_errmsg(fd)) {} - } - - if (close(fd)) - error(1, errno, "close"); - - free(buf); - usleep(400 * 1000); -} - -static void __attribute__((noreturn)) usage(const char *filepath) -{ - fprintf(stderr, "\nUsage: %s [options] hostname\n" - "\nwhere options are:\n" - " -4: only IPv4\n" - " -6: only IPv6\n" - " -h: show this message\n" - " -I: request PKTINFO\n" - " -l N: send N bytes at a time\n" - " -n: set no-payload option\n" - " -r: use raw\n" - " -R: use raw (IP_HDRINCL)\n" - " -p N: connect to port N\n" - " -u: use udp\n" - " -x: show payload (up to 70 bytes)\n", - filepath); - exit(1); -} - -static void parse_opt(int argc, char **argv) -{ - int proto_count = 0; - char c; - - while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) { - switch (c) { - case '4': - do_ipv6 = 0; - break; - case '6': - do_ipv4 = 0; - break; - case 'I': - cfg_do_pktinfo = true; - break; - case 'n': - cfg_loop_nodata = true; - break; - case 'r': - proto_count++; - cfg_proto = SOCK_RAW; - cfg_ipproto = IPPROTO_UDP; - break; - case 'R': - proto_count++; - cfg_proto = SOCK_RAW; - cfg_ipproto = IPPROTO_RAW; - break; - case 'u': - proto_count++; - cfg_proto = SOCK_DGRAM; - cfg_ipproto = IPPROTO_UDP; - break; - case 'l': - cfg_payload_len = strtoul(optarg, NULL, 10); - break; - case 'p': - dest_port = strtoul(optarg, NULL, 10); - break; - case 'x': - cfg_show_payload = true; - break; - case 'h': - default: - usage(argv[0]); - } - } - - if (!cfg_payload_len) - error(1, 0, "payload may not be nonzero"); - if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472) - error(1, 0, "udp packet might exceed expected MTU"); - if (!do_ipv4 && !do_ipv6) - error(1, 0, "pass -4 or -6, not both"); - if (proto_count > 1) - error(1, 0, "pass -r, -R or -u, not multiple"); - - if (optind != argc - 1) - error(1, 0, "missing required hostname argument"); -} - -static void resolve_hostname(const char *hostname) -{ - struct addrinfo *addrs, *cur; - int have_ipv4 = 0, have_ipv6 = 0; - - if (getaddrinfo(hostname, NULL, NULL, &addrs)) - error(1, errno, "getaddrinfo"); - - cur = addrs; - while (cur && !have_ipv4 && !have_ipv6) { - if (!have_ipv4 && cur->ai_family == AF_INET) { - memcpy(&daddr, cur->ai_addr, sizeof(daddr)); - daddr.sin_port = htons(dest_port); - have_ipv4 = 1; - } - else if (!have_ipv6 && cur->ai_family == AF_INET6) { - memcpy(&daddr6, cur->ai_addr, sizeof(daddr6)); - daddr6.sin6_port = htons(dest_port); - have_ipv6 = 1; - } - cur = cur->ai_next; - } - if (addrs) - freeaddrinfo(addrs); - - do_ipv4 &= have_ipv4; - do_ipv6 &= have_ipv6; -} - -static void do_main(int family) -{ - fprintf(stderr, "family: %s\n", - family == PF_INET ? "INET" : "INET6"); - - fprintf(stderr, "test SND\n"); - do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); - - fprintf(stderr, "test ENQ\n"); - do_test(family, SOF_TIMESTAMPING_TX_SCHED); - - fprintf(stderr, "test ENQ + SND\n"); - do_test(family, SOF_TIMESTAMPING_TX_SCHED | - SOF_TIMESTAMPING_TX_SOFTWARE); - - if (cfg_proto == SOCK_STREAM) { - fprintf(stderr, "\ntest ACK\n"); - do_test(family, SOF_TIMESTAMPING_TX_ACK); - - fprintf(stderr, "\ntest SND + ACK\n"); - do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_TX_ACK); - - fprintf(stderr, "\ntest ENQ + SND + ACK\n"); - do_test(family, SOF_TIMESTAMPING_TX_SCHED | - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_TX_ACK); - } -} - -const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" }; - -int main(int argc, char **argv) -{ - if (argc == 1) - usage(argv[0]); - - parse_opt(argc, argv); - resolve_hostname(argv[argc - 1]); - - fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]); - fprintf(stderr, "payload: %u\n", cfg_payload_len); - fprintf(stderr, "server port: %u\n", dest_port); - fprintf(stderr, "\n"); - - if (do_ipv4) - do_main(PF_INET); - if (do_ipv6) - do_main(PF_INET6); - - return 0; -} diff --git a/Makefile b/Makefile index 6b30551caee4..082f82471b51 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 167 +SUBLEVEL = 168 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index a95499ea8706..fa1d41edce68 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -511,6 +511,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) { struct modem_private_data *priv = port->private_data; + if (!priv) + return; + if (IS_ERR(priv->regulator)) return; diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 30768003f854..8c505284bc0c 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -344,7 +344,7 @@ static void omap44xx_prm_reconfigure_io_chain(void) * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and * omap44xx_prm_reconfigure_io_chain() must be called. No return value. */ -static void __init omap44xx_prm_enable_io_wakeup(void) +static void omap44xx_prm_enable_io_wakeup(void) { s32 inst = omap4_prmst_get_prm_dev_inst(); diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index 877da1908234..98e2a5dbcfda 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2724,7 +2724,6 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig (unsigned long int)(oper.indata + prev_ix), noinpages, 0, /* read access only for in data */ - 0, /* no force */ inpages, NULL); @@ -2740,8 +2739,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig current->mm, (unsigned long int)oper.cipher_outdata, nooutpages, - 1, /* write access for out data */ - 0, /* no force */ + FOLL_WRITE, /* write access for out data */ outpages, NULL); up_read(¤t->mm->mmap_sem); diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 0c161ed6d18e..8205b456de7a 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -143,7 +143,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, int ret; ret = get_user_pages(current, current->mm, virt_addr, - 1, VM_READ, 0, NULL, NULL); + 1, FOLL_WRITE, NULL, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 349995d19c7f..e596e0a1cecc 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -303,7 +303,7 @@ slow_irqon: ret = get_user_pages_unlocked(current, mm, start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 929c147e07b4..1b69bfdf59f9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -344,6 +344,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 12bbf0e8478f..7ad41be8b373 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -242,7 +242,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; ret = get_user_pages_unlocked(current, mm, start, - nr_pages - nr, write, 0, pages); + nr_pages - nr, pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) ret = (ret < 0) ? nr : ret + nr; diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index e7af6a65baab..8c51a0e94854 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -258,7 +258,8 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 2e5c4fc2daa9..150f48303fb0 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -250,7 +250,8 @@ slow: pages += nr; ret = get_user_pages_unlocked(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a37cdbdfbaa..c048d0d70cc4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -765,7 +765,7 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); /* Create, but do not attach this VCPU */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d788b0cdc0ad..6f8eadf0681f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -144,9 +144,14 @@ extern int __get_user_4(void); extern int __get_user_8(void); extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -423,9 +428,9 @@ do { \ ({ \ int __gu_err; \ unsigned long __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __builtin_expect(__gu_err, 0); \ }) @@ -471,11 +476,15 @@ struct __large_struct { unsigned long buf[100]; }; */ #define uaccess_try do { \ current_thread_info()->uaccess_err = 0; \ __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current_thread_info()->uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ } while (0) @@ -537,7 +546,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -572,12 +581,12 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ asm volatile("\n" \ "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \ "2:\n" \ "\t.section .fixup, \"ax\"\n" \ "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index f5dcb5204dcd..f575ee3aea5c 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -102,19 +102,19 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; } } @@ -130,19 +130,19 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; } } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index d83a55b95a48..dc2d00e7ced3 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -57,49 +57,49 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) __get_user_asm(*(u16 *)(8 + (char *)dst), (u16 __user *)(8 + (char __user *)src), ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) __get_user_asm(*(u64 *)(8 + (char *)dst), (u64 __user *)(8 + (char __user *)src), ret, "q", "", "=r", 8); __uaccess_end(); return ret; default: return copy_user_generic(dst, (__force void *)src, size); @@ -192,47 +192,47 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) switch (size) { case 1: { u8 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u8 __user *)src, ret, "b", "b", "=q", 1); if (likely(!ret)) __put_user_asm(tmp, (u8 __user *)dst, ret, "b", "b", "iq", 1); __uaccess_end(); return ret; } case 2: { u16 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u16 __user *)src, ret, "w", "w", "=r", 2); if (likely(!ret)) __put_user_asm(tmp, (u16 __user *)dst, ret, "w", "w", "ir", 2); __uaccess_end(); return ret; } case 4: { u32 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u32 __user *)src, ret, "l", "k", "=r", 4); if (likely(!ret)) __put_user_asm(tmp, (u32 __user *)dst, ret, "l", "k", "ir", 4); __uaccess_end(); return ret; } case 8: { u64 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u64 __user *)src, ret, "q", "", "=r", 8); if (likely(!ret)) __put_user_asm(tmp, (u64 __user *)dst, ret, "q", "", "er", 8); __uaccess_end(); return ret; } default: diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b12c0287d6cf..e8b46f575306 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -693,7 +693,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); - if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || + cpu_has(c, X86_FEATURE_VIRT_SSBD)) set_cpu_cap(c, X86_FEATURE_SSBD); if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 338d13d4fd2f..b857bb9f6f23 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -341,6 +341,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + /* cpuid 0xC0000001.edx */ const u32 kvm_supported_word5_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -358,6 +362,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word10_x86_features = F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = + F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -435,11 +443,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, cpuid_mask(&entry->ebx, 9); // TSC_ADJUST is emulated entry->ebx |= F(TSC_ADJUST); - } else + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); + } else { entry->ebx = 0; + entry->edx = 0; + } entry->eax = 0; entry->ecx = 0; - entry->edx = 0; break; } case 9: @@ -583,7 +594,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); - entry->ebx = entry->edx = 0; + entry->edx = 0; + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index d1534feefcfe..72f159f4d456 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -159,6 +159,46 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_RDTSCP)); } +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_AMD_IBPB))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + +static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SPEC_CTRL_SSBD))); +} + +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); +} + +static inline bool guest_cpuid_has_virt_ssbd(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + return best && (best->ebx & bit(X86_FEATURE_VIRT_SSBD)); +} + + + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a1afd80a68aa..3c70f6c76d3a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -56,7 +56,7 @@ #define APIC_BUS_CYCLE_NS 1 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) #define APIC_LVT_NUM 6 /* 14 is the version for Xeon and Pentium 8.4.8*/ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index df7827a981dd..ecdf724da371 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -147,6 +148,14 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -182,6 +191,8 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, + { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, @@ -411,6 +422,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -762,6 +774,25 @@ static bool valid_msr_intercept(u32 index) return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1120,6 +1151,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -1210,11 +1244,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1239,6 +1279,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); } } + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -3051,6 +3095,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_virt_ssbd(vcpu)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3125,6 +3183,59 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + if (is_guest_mode(vcpu)) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_virt_ssbd(vcpu)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -3811,6 +3922,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -3915,6 +4034,26 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif #endif + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + reload_tss(vcpu); local_irq_disable(); @@ -4015,7 +4154,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -4299,7 +4438,7 @@ static struct kvm_x86_ops svm_x86_ops = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c5a4b1978cbf..e4b5fd72ca24 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "trace.h" @@ -109,6 +110,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) @@ -172,7 +181,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -189,6 +197,7 @@ struct loaded_vmcs { struct vmcs *vmcs; int cpu; int launched; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -205,7 +214,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -384,13 +393,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -412,16 +414,16 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; u64 vmcs01_tsc_offset; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -531,6 +533,7 @@ struct vcpu_vmx { unsigned long host_rsp; u8 fail; bool nmi_known_unmasked; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -542,6 +545,10 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + + u64 arch_capabilities; + u64 spec_ctrl; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -889,6 +896,9 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -908,11 +918,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; -static unsigned long *vmx_msr_bitmap_legacy; -static unsigned long *vmx_msr_bitmap_longmode; -static unsigned long *vmx_msr_bitmap_legacy_x2apic; -static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_nested; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -1689,6 +1694,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + +/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2074,6 +2125,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (vmx->loaded_vmcs->cpu != cpu) { @@ -2353,27 +2405,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_nested; - else if (vcpu->arch.apic_base & X2APIC_ENABLE) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2414,7 +2445,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -2828,6 +2859,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -2927,6 +2971,68 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -3352,11 +3458,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3372,6 +3473,34 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); +} + +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } + return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; } static void free_kvm_area(void) @@ -4370,10 +4499,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4407,8 +4534,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4488,37 +4615,76 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); } -static void vmx_enable_intercept_msr_read_x2apic(u32 msr) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); + u8 mode = 0; + + if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; } -static void vmx_disable_intercept_msr_read_x2apic(u32 msr) +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_ID), MSR_TYPE_R); + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } + } } -static void vmx_disable_intercept_msr_write_x2apic(u32 msr) +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_W); + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; } static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) @@ -4526,6 +4692,28 @@ static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) return enable_apicv && lapic_in_kernel(vcpu); } +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4533,18 +4721,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return; - - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - if (max_irr == 256) - return; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -4556,6 +4741,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } + + nested_mark_vmcs12_pages_dirty(vcpu); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -4818,7 +5005,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -4890,6 +5077,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); @@ -4918,6 +5107,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->soft_vnmi_blocked = 0; @@ -6159,7 +6349,7 @@ static void wakeup_handler(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6174,38 +6364,13 @@ static __init int hardware_setup(void) if (!vmx_io_bitmap_b) goto out; - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy) - goto out1; - - vmx_msr_bitmap_legacy_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic) - goto out2; - - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode) - goto out3; - - vmx_msr_bitmap_longmode_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic) - goto out4; - - if (nested) { - vmx_msr_bitmap_nested = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_nested) - goto out5; - } - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) - goto out6; + goto out1; vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmwrite_bitmap) - goto out7; + goto out2; memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); @@ -6214,14 +6379,9 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (nested) - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; - goto out8; + goto out3; } if (boot_cpu_has(X86_FEATURE_NX)) @@ -6287,38 +6447,8 @@ static __init int hardware_setup(void) kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - if (enable_apicv) { - for (msr = 0x800; msr <= 0x8ff; msr++) - vmx_disable_intercept_msr_read_x2apic(msr); - - /* According SDM, in x2apic mode, the whole id reg is used. - * But in KVM, it only use the highest eight bits. Need to - * intercept it */ - vmx_enable_intercept_msr_read_x2apic(0x802); - /* TMCCT */ - vmx_enable_intercept_msr_read_x2apic(0x839); - /* TPR */ - vmx_disable_intercept_msr_write_x2apic(0x808); - /* EOI */ - vmx_disable_intercept_msr_write_x2apic(0x80b); - /* SELF-IPI */ - vmx_disable_intercept_msr_write_x2apic(0x83f); - } - if (enable_ept) { kvm_mmu_set_mask_ptes(0ull, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, @@ -6349,21 +6479,10 @@ static __init int hardware_setup(void) return alloc_kvm_area(); -out8: - free_page((unsigned long)vmx_vmwrite_bitmap); -out7: - free_page((unsigned long)vmx_vmread_bitmap); -out6: - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); -out5: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -out4: - free_page((unsigned long)vmx_msr_bitmap_longmode); out3: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_vmwrite_bitmap); out2: - free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_vmread_bitmap); out1: free_page((unsigned long)vmx_io_bitmap_b); out: @@ -6374,16 +6493,10 @@ out: static __exit void hardware_unsetup(void) { - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); - free_page((unsigned long)vmx_msr_bitmap_legacy); - free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); free_page((unsigned long)vmx_vmread_bitmap); - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); free_kvm_area(); } @@ -6426,93 +6539,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } -/* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_entry(vmx->nested.vmcs02_pool.prev, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified @@ -6786,6 +6812,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) struct vmcs *shadow_vmcs; const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + int r; /* The Intel VMX Instruction Reference lists a bunch of bits that * are prerequisite to running VMXON, most notably cr4.VMXE must be @@ -6825,10 +6852,14 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) + goto out_vmcs02; + if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); if (!shadow_vmcs) - return -ENOMEM; + goto out_shadow_vmcs; /* mark vmcs as shadow */ shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ @@ -6836,9 +6867,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) vmx->nested.current_shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -6850,6 +6878,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; + +out_shadow_vmcs: + free_loaded_vmcs(&vmx->nested.vmcs02); + +out_vmcs02: + return -ENOMEM; } /* @@ -6921,7 +6955,7 @@ static void free_nested(struct vcpu_vmx *vmx) nested_release_vmcs12(vmx); if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -6937,7 +6971,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -6971,8 +7005,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); - nested_free_vmcs02(vmx, vmptr); - skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; @@ -7757,6 +7789,19 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; @@ -8244,7 +8289,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -8413,9 +8458,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) local_irq_enable(); } -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -8607,7 +8664,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + vmx->__launched = vmx->loaded_vmcs->launched; + asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -8725,6 +8791,26 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -8824,6 +8910,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -8856,16 +8943,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - if (!vmx->loaded_vmcs->vmcs) - goto free_msrs; if (!vmm_exclusive) kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); - loaded_vmcs_init(vmx->loaded_vmcs); + err = alloc_loaded_vmcs(&vmx->vmcs01); if (!vmm_exclusive) kvm_cpu_vmxoff(); + if (err < 0) + goto free_msrs; + + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; @@ -9248,9 +9343,26 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, { int msr; struct page *page; - unsigned long *msr_bitmap; + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* + * pred_cmd & spec_ctrl are trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && + !pred_cmd && !spec_ctrl) return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); @@ -9258,59 +9370,46 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, WARN_ON(1); return false; } - msr_bitmap = (unsigned long *)kmap(page); + msr_bitmap_l1 = (unsigned long *)kmap(page); + + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { if (nested_cpu_has_apic_reg_virt(vmcs12)) for (msr = 0x800; msr <= 0x8ff; msr++) nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, msr, MSR_TYPE_R); - /* TPR is allowed */ - nested_vmx_disable_intercept_for_msr(msr_bitmap, - vmx_msr_bitmap_nested, + + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_TASKPRI >> 4), MSR_TYPE_R | MSR_TYPE_W); + if (nested_cpu_has_vid(vmcs12)) { - /* EOI and self-IPI are allowed */ nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_EOI >> 4), MSR_TYPE_W); nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_SELF_IPI >> 4), MSR_TYPE_W); } - } else { - /* - * Enable reading intercept of all the x2apic - * MSRs. We should not rely on vmcs12 to do any - * optimizations here, it may have been modified - * by L1. - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - msr, - MSR_TYPE_R); - - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_TASKPRI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_EOI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), - MSR_TYPE_W); } + + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, + MSR_TYPE_W); + kunmap(page); nested_release_page_clean(page); @@ -9729,10 +9828,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS) { - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); - /* MSR_BITMAP will be set by following vmx_set_efer. */ - } else + exec_control & CPU_BASED_USE_MSR_BITMAPS && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ + else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; /* @@ -9784,6 +9883,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) else vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -9876,7 +9978,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; - struct loaded_vmcs *vmcs02; bool ia32e; u32 msr_entry_idx; @@ -10016,10 +10117,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); @@ -10028,7 +10125,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); cpu = get_cpu(); - vmx->loaded_vmcs = vmcs02; + vmx->loaded_vmcs = &vmx->nested.vmcs02; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; @@ -10489,7 +10586,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) @@ -10540,10 +10637,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - load_vmcs12_host_state(vcpu, vmcs12); /* Update TSC_OFFSET if TSC was changed while L2 ran */ @@ -10871,7 +10964,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e6ab034f0bc7..aa1a0277a678 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -961,6 +961,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; @@ -984,6 +985,7 @@ static u32 emulated_msrs[] = { MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, MSR_IA32_SMBASE, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2583,7 +2585,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; @@ -4072,14 +4074,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 91d93b95bd86..0a6fcae404f8 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -570,12 +570,12 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -583,12 +583,12 @@ EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user_zeroing(to, from, n); else n = __copy_user_zeroing_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll); @@ -596,13 +596,13 @@ EXPORT_SYMBOL(__copy_from_user_ll); unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel((void __user *)to, (const void *)from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -610,7 +610,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero); unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_zeroing_intel_nocache(to, from, n); @@ -619,7 +619,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, #else __copy_user_zeroing(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache); @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index ae9a37bf1371..7d2542ad346a 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -388,7 +388,7 @@ slow_irqon: ret = get_user_pages_unlocked(current, mm, start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 7ed47b1e6f42..7e94fc6f608a 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -536,10 +536,9 @@ static int mpx_resolve_fault(long __user *addr, int write) { long gup_ret; int nr_pages = 1; - int force = 0; gup_ret = get_user_pages(current, current->mm, (unsigned long)addr, - nr_pages, write, force, NULL, NULL); + nr_pages, write ? FOLL_WRITE : 0, NULL, NULL); /* * get_user_pages() returns number of pages gotten. * 0 means we failed to fault in and get anything, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e40a6d8b0b92..062c23125b2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -496,9 +496,13 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) int r; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); + unsigned int flags = 0; enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + if (write) + flags |= FOLL_WRITE; + if (current->mm != gtt->usermm) return -EPERM; @@ -519,7 +523,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) struct page **pages = ttm->pages + pinned; r = get_user_pages(current, current->mm, userptr, num_pages, - write, 0, pages, NULL); + flags, pages, NULL); if (r < 0) goto release_pages; diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 21085f669e21..b19ba1792607 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -968,9 +968,21 @@ static int get_clock(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4; return val & 1 ? 1 : 0; } @@ -978,9 +990,21 @@ static int get_data(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5; return val & 1 ? 1 : 0; } @@ -993,7 +1017,7 @@ static void set_clock(void *i2c_priv, int clock) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((clock & 0x01) ? 0 : 1); - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfe, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01); if (ujcrb7 == jtemp) break; @@ -1009,7 +1033,7 @@ static void set_data(void *i2c_priv, int data) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((data & 0x01) ? 0 : 1) << 2; - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfb, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04); if (ujcrb7 == jtemp) break; diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index c17efdb238a6..639ea28808e2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -471,7 +471,8 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, goto err_free; } - ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec); + ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + g2d_userptr->vec); if (ret != npages) { DRM_ERROR("failed to get user pages from userptr.\n"); if (ret < 0) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 359fe2b8bb8a..b02113b57d51 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -581,13 +581,17 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) pvec = drm_malloc_ab(npages, sizeof(struct page *)); if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; + unsigned int flags = 0; + + if (!obj->userptr.read_only) + flags |= FOLL_WRITE; down_read(&mm->mmap_sem); while (pinned < npages) { ret = get_user_pages(work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, - !obj->userptr.read_only, 0, + flags, pvec + pinned, NULL); if (ret < 0) break; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d684e2b79d2b..0c380fe77382 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -557,7 +557,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) struct page **pages = ttm->pages + pinned; r = get_user_pages(current, current->mm, userptr, num_pages, - write, 0, pages, NULL); + write ? FOLL_WRITE : 0, pages, NULL); if (r < 0) goto release_pages; diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c index d0cbd5ecd7f0..4459cb32d1fe 100644 --- a/drivers/gpu/drm/via/via_dmablit.c +++ b/drivers/gpu/drm/via/via_dmablit.c @@ -242,8 +242,8 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) ret = get_user_pages(current, current->mm, (unsigned long)xfer->mem_addr, vsg->num_pages, - (vsg->direction == DMA_FROM_DEVICE), - 0, vsg->pages, NULL); + (vsg->direction == DMA_FROM_DEVICE) ? FOLL_WRITE : 0, + vsg->pages, NULL); up_read(¤t->mm->mmap_sem); if (ret != vsg->num_pages) { diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 9ac6e1673375..1f291b344178 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -273,7 +273,7 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg, break; case INA2XX_CURRENT: /* signed register, result in mA */ - val = regval * data->current_lsb_uA; + val = (s16)regval * data->current_lsb_uA; val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_CALIBRATION: diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 49276bbdac3d..1bb80f992aa8 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -1691,7 +1691,7 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, * somewhere else in the code */ #define SENSOR_ATTR_TEMP(index) { \ - SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 4 ? S_IWUSR : 0), \ + SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 5 ? S_IWUSR : 0), \ show_temp_mode, store_temp_mode, NOT_USED, index - 1), \ SENSOR_ATTR_2(temp##index##_input, S_IRUGO, show_temp, \ NULL, TEMP_READ, index - 1), \ diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 98fd9a594841..8762eac47570 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -95,6 +95,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DEFINE_DMA_ATTRS(attrs); struct scatterlist *sg, *sg_list_start; int need_release = 0; + unsigned int gup_flags = FOLL_WRITE; if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -177,6 +178,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (ret) goto out; + if (!umem->writable) + gup_flags |= FOLL_FORCE; + need_release = 1; sg_list_start = umem->sg_head.sgl; @@ -184,7 +188,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ret = get_user_pages(current, current->mm, cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), - 1, !umem->writable, page_list, vma_list); + gup_flags, page_list, vma_list); if (ret < 0) goto out; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 40becdb3196e..738ccfee7cae 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, u64 off; int j, k, ret = 0, start_idx, npages = 0; u64 base_virt_addr; + unsigned int flags = 0; if (access_mask == 0) return -EINVAL; @@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, goto out_put_task; } + if (access_mask & ODP_WRITE_ALLOWED_BIT) + flags |= FOLL_WRITE; + start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT; k = start_idx; @@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, */ npages = get_user_pages(owning_process, owning_mm, user_virt, gup_num_pages, - access_mask & ODP_WRITE_ALLOWED_BIT, 0, - local_page_list, NULL); + flags, local_page_list, NULL); up_read(&owning_mm->mmap_sem); if (npages < 0) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 7d2e42dd6926..8676685dbf3d 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -472,8 +472,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, goto out; } - ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, - pages, NULL); + ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, + FOLL_WRITE, pages, NULL); if (ret < 0) goto out; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index ab1588ae1c85..75c3f0dffe63 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -68,7 +68,8 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, + FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 645a5f6e6c88..7f0d75e29441 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -113,6 +113,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int flags; dma_addr_t pa; DEFINE_DMA_ATTRS(attrs); + unsigned int gup_flags; if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -140,6 +141,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, flags = IOMMU_READ | IOMMU_CACHE; flags |= (writable) ? IOMMU_WRITE : 0; + gup_flags = FOLL_WRITE; + gup_flags |= (writable) ? 0 : FOLL_FORCE; cur_base = addr & PAGE_MASK; ret = 0; @@ -147,7 +150,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = get_user_pages(current, current->mm, cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), - 1, !writable, page_list, NULL); + gup_flags, page_list, NULL); if (ret < 0) goto out; diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c index f770f6a2c987..3ea9edc8cdbe 100644 --- a/drivers/media/dvb-frontends/ascot2e.c +++ b/drivers/media/dvb-frontends/ascot2e.c @@ -155,7 +155,9 @@ static int ascot2e_write_regs(struct ascot2e_priv *priv, static int ascot2e_write_reg(struct ascot2e_priv *priv, u8 reg, u8 val) { - return ascot2e_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return ascot2e_write_regs(priv, reg, &tmp, 1); } static int ascot2e_read_regs(struct ascot2e_priv *priv, diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index 107853b0fddd..bde77671a37c 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -241,7 +241,9 @@ static int cxd2841er_write_regs(struct cxd2841er_priv *priv, static int cxd2841er_write_reg(struct cxd2841er_priv *priv, u8 addr, u8 reg, u8 val) { - return cxd2841er_write_regs(priv, addr, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return cxd2841er_write_regs(priv, addr, reg, &tmp, 1); } static int cxd2841er_read_regs(struct cxd2841er_priv *priv, diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c index 000606af70f7..f770ab72a8e3 100644 --- a/drivers/media/dvb-frontends/horus3a.c +++ b/drivers/media/dvb-frontends/horus3a.c @@ -89,7 +89,9 @@ static int horus3a_write_regs(struct horus3a_priv *priv, static int horus3a_write_reg(struct horus3a_priv *priv, u8 reg, u8 val) { - return horus3a_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return horus3a_write_regs(priv, reg, &tmp, 1); } static int horus3a_enter_power_save(struct horus3a_priv *priv) diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c index cadcae4cff89..ac9d2591bb6f 100644 --- a/drivers/media/dvb-frontends/itd1000.c +++ b/drivers/media/dvb-frontends/itd1000.c @@ -99,8 +99,9 @@ static int itd1000_read_reg(struct itd1000_state *state, u8 reg) static inline int itd1000_write_reg(struct itd1000_state *state, u8 r, u8 v) { - int ret = itd1000_write_regs(state, r, &v, 1); - state->shadow[r] = v; + u8 tmp = v; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + int ret = itd1000_write_regs(state, r, &tmp, 1); + state->shadow[r] = tmp; return ret; } diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c index c36e6764eead..c44188271028 100644 --- a/drivers/media/dvb-frontends/mt312.c +++ b/drivers/media/dvb-frontends/mt312.c @@ -142,7 +142,10 @@ static inline int mt312_readreg(struct mt312_state *state, static inline int mt312_writereg(struct mt312_state *state, const enum mt312_reg_addr reg, const u8 val) { - return mt312_write(state, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + + return mt312_write(state, reg, &tmp, 1); } static inline u32 mt312_div(u32 a, u32 b) diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c index 756650f154ab..ad9b7d4f8d95 100644 --- a/drivers/media/dvb-frontends/stb0899_drv.c +++ b/drivers/media/dvb-frontends/stb0899_drv.c @@ -552,7 +552,8 @@ int stb0899_write_regs(struct stb0899_state *state, unsigned int reg, u8 *data, int stb0899_write_reg(struct stb0899_state *state, unsigned int reg, u8 data) { - return stb0899_write_regs(state, reg, &data, 1); + u8 tmp = data; + return stb0899_write_regs(state, reg, &tmp, 1); } /* diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c index 4ef8a5c7003e..44fac2570034 100644 --- a/drivers/media/dvb-frontends/stb6100.c +++ b/drivers/media/dvb-frontends/stb6100.c @@ -226,12 +226,14 @@ static int stb6100_write_reg_range(struct stb6100_state *state, u8 buf[], int st static int stb6100_write_reg(struct stb6100_state *state, u8 reg, u8 data) { + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + if (unlikely(reg >= STB6100_NUMREGS)) { dprintk(verbose, FE_ERROR, 1, "Invalid register offset 0x%x", reg); return -EREMOTEIO; } - data = (data & stb6100_template[reg].mask) | stb6100_template[reg].set; - return stb6100_write_reg_range(state, &data, reg, 1); + tmp = (tmp & stb6100_template[reg].mask) | stb6100_template[reg].set; + return stb6100_write_reg_range(state, &tmp, reg, 1); } diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index 44cb73f68af6..ddd0d778ad6e 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -804,7 +804,9 @@ int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - return stv0367_writeregs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv0367_writeregs(state, reg, &tmp, 1); } static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c index 25bdf6e0f963..f0377e2b341b 100644 --- a/drivers/media/dvb-frontends/stv090x.c +++ b/drivers/media/dvb-frontends/stv090x.c @@ -761,7 +761,9 @@ static int stv090x_write_regs(struct stv090x_state *state, unsigned int reg, u8 static int stv090x_write_reg(struct stv090x_state *state, unsigned int reg, u8 data) { - return stv090x_write_regs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv090x_write_regs(state, reg, &tmp, 1); } static int stv090x_i2c_gate_ctrl(struct stv090x_state *state, int enable) diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c index e66154e5c1d7..45d14869e7b8 100644 --- a/drivers/media/dvb-frontends/stv6110x.c +++ b/drivers/media/dvb-frontends/stv6110x.c @@ -97,7 +97,9 @@ static int stv6110x_write_regs(struct stv6110x_state *stv6110x, int start, u8 da static int stv6110x_write_reg(struct stv6110x_state *stv6110x, u8 reg, u8 data) { - return stv6110x_write_regs(stv6110x, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv6110x_write_regs(stv6110x, reg, &tmp, 1); } static int stv6110x_init(struct dvb_frontend *fe) diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c index ee09ec26c553..b273e4fd8024 100644 --- a/drivers/media/dvb-frontends/zl10039.c +++ b/drivers/media/dvb-frontends/zl10039.c @@ -138,7 +138,9 @@ static inline int zl10039_writereg(struct zl10039_state *state, const enum zl10039_reg_addr reg, const u8 val) { - return zl10039_write(state, reg, &val, 1); + const u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return zl10039_write(state, reg, &tmp, 1); } static int zl10039_init(struct dvb_frontend *fe) diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c index 24152accc66c..8729fdebef8f 100644 --- a/drivers/media/pci/ivtv/ivtv-udma.c +++ b/drivers/media/pci/ivtv/ivtv-udma.c @@ -125,7 +125,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr, /* Get user pages for DMA Xfer */ err = get_user_pages_unlocked(current, current->mm, - user_dma.uaddr, user_dma.page_count, 0, 1, dma->map); + user_dma.uaddr, user_dma.page_count, dma->map, + FOLL_FORCE); if (user_dma.page_count != err) { IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n", diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c index 2b8e7b2f2b86..9cd995f418e0 100644 --- a/drivers/media/pci/ivtv/ivtv-yuv.c +++ b/drivers/media/pci/ivtv/ivtv-yuv.c @@ -76,13 +76,13 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma, /* Get user pages for DMA Xfer */ y_pages = get_user_pages_unlocked(current, current->mm, - y_dma.uaddr, y_dma.page_count, 0, 1, - &dma->map[0]); + y_dma.uaddr, y_dma.page_count, + &dma->map[0], FOLL_FORCE); uv_pages = 0; /* silence gcc. value is set and consumed only if: */ if (y_pages == y_dma.page_count) { uv_pages = get_user_pages_unlocked(current, current->mm, - uv_dma.uaddr, uv_dma.page_count, 0, 1, - &dma->map[y_pages]); + uv_dma.uaddr, uv_dma.page_count, + &dma->map[y_pages], FOLL_FORCE); } if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) { diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 70c28d19ea04..596359576109 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -214,7 +214,7 @@ static int omap_vout_get_userptr(struct videobuf_buffer *vb, u32 virtp, if (!vec) return -ENOMEM; - ret = get_vaddr_frames(virtp, 1, true, false, vec); + ret = get_vaddr_frames(virtp, 1, FOLL_WRITE, vec); if (ret != 1) { frame_vector_destroy(vec); return -EINVAL; diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f669cedca8bd..f74a74d91b9e 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -156,6 +156,7 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, { unsigned long first, last; int err, rw = 0; + unsigned int flags = FOLL_FORCE; dma->direction = direction; switch (dma->direction) { @@ -178,13 +179,15 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, if (NULL == dma->pages) return -ENOMEM; + if (rw == READ) + flags |= FOLL_WRITE; + dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", data, size, dma->nr_pages); err = get_user_pages(current, current->mm, data & PAGE_MASK, dma->nr_pages, - rw == READ, 1, /* force */ - dma->pages, NULL); + flags, dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 3c3b517f1d1c..1cd322e939c7 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -42,6 +42,10 @@ struct frame_vector *vb2_create_framevec(unsigned long start, unsigned long first, last; unsigned long nr; struct frame_vector *vec; + unsigned int flags = FOLL_FORCE; + + if (write) + flags |= FOLL_WRITE; first = start >> PAGE_SHIFT; last = (start + length - 1) >> PAGE_SHIFT; @@ -49,7 +53,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start, vec = frame_vector_create(nr); if (!vec) return ERR_PTR(-ENOMEM); - ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec); + ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec); if (ret < 0) goto out_destroy; /* We accept only complete set of PFNs */ diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 8bd63128d536..71c69e1c4ac0 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -1398,8 +1398,7 @@ retry: mm, (u64)addr, nr_pages, - !!(prot & SCIF_PROT_WRITE), - 0, + (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0, pinned_pages->pages, NULL); up_write(&mm->mmap_sem); diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index f74fc0ca2ef9..e6b723c6a2af 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -199,7 +199,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, *pageshift = PAGE_SHIFT; #endif if (get_user_pages - (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) + (current, current->mm, vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0) return -EFAULT; *paddr = page_to_phys(page); put_page(page); diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 16baaafed26c..cbdeb54eab51 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1090,6 +1090,9 @@ static void nic_remove(struct pci_dev *pdev) { struct nicpf *nic = pci_get_drvdata(pdev); + if (!nic) + return; + if (nic->flags & NIC_SRIOV_ENABLED) pci_disable_sriov(pdev); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 253f8ed0537a..60c727b0b7ab 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -919,10 +919,8 @@ static int hip04_mac_probe(struct platform_device *pdev) } ret = register_netdev(ndev); - if (ret) { - free_netdev(ndev); + if (ret) goto alloc_fail; - } return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index ffd2e74e5638..dcd718ce13d5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1429,7 +1429,9 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, *autoneg = false; if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) { *speed = IXGBE_LINK_SPEED_1GB_FULL; return 0; } diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index deae10d7426d..9b588251f2a7 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -578,6 +578,7 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) struct cp_private *cp; int handled = 0; u16 status; + u16 mask; if (unlikely(dev == NULL)) return IRQ_NONE; @@ -585,6 +586,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) spin_lock(&cp->lock); + mask = cpr16(IntrMask); + if (!mask) + goto out_unlock; + status = cpr16(IntrStatus); if (!status || (status == 0xFFFF)) goto out_unlock; diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 3920c3eb6006..df6063faad2e 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -821,37 +821,49 @@ static int rocker_tlv_put(struct rocker_desc_info *desc_info, static int rocker_tlv_put_u8(struct rocker_desc_info *desc_info, int attrtype, u8 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value); + u8 tmp = value; /* work around GCC PR81715 */ + + return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &tmp); } static int rocker_tlv_put_u16(struct rocker_desc_info *desc_info, int attrtype, u16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &tmp); } static int rocker_tlv_put_be16(struct rocker_desc_info *desc_info, int attrtype, __be16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &tmp); } static int rocker_tlv_put_u32(struct rocker_desc_info *desc_info, int attrtype, u32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &tmp); } static int rocker_tlv_put_be32(struct rocker_desc_info *desc_info, int attrtype, __be32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &tmp); } static int rocker_tlv_put_u64(struct rocker_desc_info *desc_info, int attrtype, u64 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value); + u64 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &tmp); } static struct rocker_tlv * diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1f2f25a71d18..70f26b30729c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1265,20 +1265,17 @@ static int gen10g_resume(struct phy_device *phydev) static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | - PHY_10BT_FEATURES); - switch (max_speed) { - default: - return -ENOTSUPP; - case SPEED_1000: - phydev->supported |= PHY_1000BT_FEATURES; + case SPEED_10: + phydev->supported &= ~PHY_100BT_FEATURES; /* fall through */ case SPEED_100: - phydev->supported |= PHY_100BT_FEATURES; - /* fall through */ - case SPEED_10: - phydev->supported |= PHY_10BT_FEATURES; + phydev->supported &= ~PHY_1000BT_FEATURES; + break; + case SPEED_1000: + break; + default: + return -ENOTSUPP; } return 0; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 5ac0b850d6b1..fd9ff9eff237 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1475,9 +1475,9 @@ static void tun_setup(struct net_device *dev) */ static int tun_validate(struct nlattr *tb[], struct nlattr *data[]) { - if (!data) - return 0; - return -EINVAL; + /* NL_SET_ERR_MSG(extack, + "tun/tap creation via rtnetlink is not supported."); */ + return -EOPNOTSUPP; } static struct rtnl_link_ops tun_link_ops __read_mostly = { diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 6ed26baca0e5..7af8479acb98 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -1035,8 +1035,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie) }; int rc; u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len; - struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL); + struct wmi_set_appie_cmd *cmd; + if (len < ie_len) { + rc = -EINVAL; + goto out; + } + + cmd = kzalloc(len, GFP_KERNEL); if (!cmd) { rc = -ENOMEM; goto out; diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 03054c0e7689..3c3e8115f73d 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -187,30 +187,25 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) struct scsi_device *SDev; struct scsi_sense_hdr sshdr; int result, err = 0, retries = 0; - struct request_sense *sense = cgc->sense; + unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE]; SDev = cd->device; - if (!sense) { - sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL); - if (!sense) { - err = -ENOMEM; - goto out; - } - } - retry: if (!scsi_block_when_processing_errors(SDev)) { err = -ENODEV; goto out; } - memset(sense, 0, sizeof(*sense)); + memset(sense_buffer, 0, sizeof(sense_buffer)); result = scsi_execute(SDev, cgc->cmd, cgc->data_direction, - cgc->buffer, cgc->buflen, (char *)sense, + cgc->buffer, cgc->buflen, sense_buffer, cgc->timeout, IOCTL_RETRIES, 0, NULL); - scsi_normalize_sense((char *)sense, sizeof(*sense), &sshdr); + scsi_normalize_sense(sense_buffer, sizeof(sense_buffer), &sshdr); + + if (cgc->sense) + memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense)); /* Minimal error checking. Ignore cases we know about, and report the rest. */ if (driver_byte(result) != 0) { @@ -261,8 +256,6 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) /* Wake up a process waiting for device */ out: - if (!cgc->sense) - kfree(sense); cgc->stat = err; return err; } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 2e522951b619..088a68ab4246 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4821,9 +4821,8 @@ static int sgl_map_user_pages(struct st_buffer *STbp, current->mm, uaddr, nr_pages, - rw == READ, - 0, /* don't force */ - pages); + pages, + rw == READ ? FOLL_WRITE : 0); /* don't force */ /* Errors and no page mapped should return here */ if (res < nr_pages) diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c index 9071afbd7bf4..b776b74d3d14 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/staging/rdma/hfi1/user_pages.c @@ -85,7 +85,7 @@ static int __hfi1_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c index d29b4daf61f8..f69ec728e0de 100644 --- a/drivers/staging/rdma/ipath/ipath_user_pages.c +++ b/drivers/staging/rdma/ipath/ipath_user_pages.c @@ -72,7 +72,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/staging/speakup/kobjects.c b/drivers/staging/speakup/kobjects.c index 06ef26872462..52aed7cfeb24 100644 --- a/drivers/staging/speakup/kobjects.c +++ b/drivers/staging/speakup/kobjects.c @@ -387,7 +387,7 @@ static ssize_t synth_store(struct kobject *kobj, struct kobj_attribute *attr, len = strlen(buf); if (len < 2 || len > 9) return -EINVAL; - strncpy(new_synth_name, buf, len); + memcpy(new_synth_name, buf, len); if (new_synth_name[len - 1] == '\n') len--; new_synth_name[len] = '\0'; @@ -514,7 +514,7 @@ static ssize_t punc_store(struct kobject *kobj, struct kobj_attribute *attr, return -EINVAL; } - strncpy(punc_buf, buf, x); + memcpy(punc_buf, buf, x); while (x && punc_buf[x - 1] == '\n') x--; diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 9b7d39484ed3..d1ed92acafa3 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2037,6 +2037,7 @@ static inline int machine_without_vbus_sense(void) { return machine_is_omap_innovator() || machine_is_omap_osk() + || machine_is_omap_palmte() || machine_is_sx1() /* No known omap7xx boards with vbus sense */ || cpu_is_omap7xx(); @@ -2045,7 +2046,7 @@ static inline int machine_without_vbus_sense(void) static int omap_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { - int status = -ENODEV; + int status; struct omap_ep *ep; unsigned long flags; @@ -2083,6 +2084,7 @@ static int omap_udc_start(struct usb_gadget *g, goto done; } } else { + status = 0; if (can_pullup(udc)) pullup_enable(udc); else @@ -2612,9 +2614,22 @@ omap_ep_setup(char *name, u8 addr, u8 type, static void omap_udc_release(struct device *dev) { - complete(udc->done); + pullup_disable(udc); + if (!IS_ERR_OR_NULL(udc->transceiver)) { + usb_put_phy(udc->transceiver); + udc->transceiver = NULL; + } + omap_writew(0, UDC_SYSCON1); + remove_proc_file(); + if (udc->dc_clk) { + if (udc->clk_requested) + omap_udc_enable_clock(0); + clk_put(udc->hhc_clk); + clk_put(udc->dc_clk); + } + if (udc->done) + complete(udc->done); kfree(udc); - udc = NULL; } static int @@ -2886,8 +2901,8 @@ bad_on_1710: udc->clr_halt = UDC_RESET_EP; /* USB general purpose IRQ: ep0, state changes, dma, etc */ - status = request_irq(pdev->resource[1].start, omap_udc_irq, - 0, driver_name, udc); + status = devm_request_irq(&pdev->dev, pdev->resource[1].start, + omap_udc_irq, 0, driver_name, udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[1].start, status); @@ -2895,20 +2910,20 @@ bad_on_1710: } /* USB "non-iso" IRQ (PIO for all but ep0) */ - status = request_irq(pdev->resource[2].start, omap_udc_pio_irq, - 0, "omap_udc pio", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[2].start, + omap_udc_pio_irq, 0, "omap_udc pio", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[2].start, status); - goto cleanup2; + goto cleanup1; } #ifdef USE_ISO - status = request_irq(pdev->resource[3].start, omap_udc_iso_irq, - 0, "omap_udc iso", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[3].start, + omap_udc_iso_irq, 0, "omap_udc iso", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[3].start, status); - goto cleanup3; + goto cleanup1; } #endif if (cpu_is_omap16xx() || cpu_is_omap7xx()) { @@ -2919,23 +2934,8 @@ bad_on_1710: } create_proc_file(); - status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, - omap_udc_release); - if (status) - goto cleanup4; - - return 0; - -cleanup4: - remove_proc_file(); - -#ifdef USE_ISO -cleanup3: - free_irq(pdev->resource[2].start, udc); -#endif - -cleanup2: - free_irq(pdev->resource[1].start, udc); + return usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, + omap_udc_release); cleanup1: kfree(udc); @@ -2962,42 +2962,15 @@ static int omap_udc_remove(struct platform_device *pdev) { DECLARE_COMPLETION_ONSTACK(done); - if (!udc) - return -ENODEV; - - usb_del_gadget_udc(&udc->gadget); - if (udc->driver) - return -EBUSY; - udc->done = &done; - pullup_disable(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) { - usb_put_phy(udc->transceiver); - udc->transceiver = NULL; - } - omap_writew(0, UDC_SYSCON1); - - remove_proc_file(); - -#ifdef USE_ISO - free_irq(pdev->resource[3].start, udc); -#endif - free_irq(pdev->resource[2].start, udc); - free_irq(pdev->resource[1].start, udc); + usb_del_gadget_udc(&udc->gadget); - if (udc->dc_clk) { - if (udc->clk_requested) - omap_udc_enable_clock(0); - clk_put(udc->hhc_clk); - clk_put(udc->dc_clk); - } + wait_for_completion(&done); release_mem_region(pdev->resource[0].start, pdev->resource[0].end - pdev->resource[0].start + 1); - wait_for_completion(&done); - return 0; } diff --git a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c index 195ad7cac1ba..68fa037d8cbc 100644 --- a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c +++ b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c @@ -372,7 +372,7 @@ static int Ti3026_init(struct matrox_fb_info *minfo, struct my_timming *m) DBG(__func__) - memcpy(hw->DACreg, MGADACbpp32, sizeof(hw->DACreg)); + memcpy(hw->DACreg, MGADACbpp32, sizeof(MGADACbpp32)); switch (minfo->fbcon.var.bits_per_pixel) { case 4: hw->DACreg[POS3026_XLATCHCTRL] = TVP3026_XLATCHCTRL_16_1; /* or _8_1, they are same */ hw->DACreg[POS3026_XTRUECOLORCTRL] = TVP3026_XTRUECOLORCTRL_PSEUDOCOLOR; diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index 0e24eb9c219c..750a384bf191 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -687,7 +687,7 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf, return -ENOMEM; ret = get_user_pages_unlocked(current, current->mm, (unsigned long)buf, - nr_pages, WRITE, 0, pages); + nr_pages, pages, FOLL_WRITE); if (ret < nr_pages) { nr_pages = ret; diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 32c8fc5f7a5c..590a0f51a249 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -246,8 +246,8 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) down_read(¤t->mm->mmap_sem); num_pinned = get_user_pages(current, current->mm, param.local_vaddr - lb_offset, num_pages, - (param.source == -1) ? READ : WRITE, - 0, pages, NULL); + (param.source == -1) ? 0 : FOLL_WRITE, + pages, NULL); up_read(¤t->mm->mmap_sem); if (num_pinned != num_pages) { diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 5063c5e796b7..84a1fab0dd6b 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 83c73738165e..40d1ab957fb6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3232,7 +3232,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) kfree(m); } -static void tail_append_pending_moves(struct pending_dir_move *moves, +static void tail_append_pending_moves(struct send_ctx *sctx, + struct pending_dir_move *moves, struct list_head *stack) { if (list_empty(&moves->list)) { @@ -3243,6 +3244,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves, list_add_tail(&moves->list, stack); list_splice_tail(&list, stack); } + if (!RB_EMPTY_NODE(&moves->node)) { + rb_erase(&moves->node, &sctx->pending_dir_moves); + RB_CLEAR_NODE(&moves->node); + } } static int apply_children_dir_moves(struct send_ctx *sctx) @@ -3257,7 +3262,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) return 0; INIT_LIST_HEAD(&stack); - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); while (!list_empty(&stack)) { pm = list_first_entry(&stack, struct pending_dir_move, list); @@ -3268,7 +3273,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) goto out; pm = get_pending_dir_moves(sctx, parent_ino); if (pm) - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); } return 0; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 5b68cf526887..c05ab2ec0fef 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -963,11 +963,8 @@ error: void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) { struct cachefiles_object *object; - struct cachefiles_cache *cache; object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); _enter("%p,{%lu}", object, page->index); diff --git a/fs/exec.c b/fs/exec.c index 910fc70c4542..3dad755b7048 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, { struct page *page; int ret; + unsigned int gup_flags = FOLL_FORCE; #ifdef CONFIG_STACK_GROWSUP if (write) { @@ -199,8 +200,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, return NULL; } #endif - ret = get_user_pages(current, bprm->mm, pos, - 1, write, 1, &page, NULL); + + if (write) + gup_flags |= FOLL_WRITE; + + ret = get_user_pages(current, bprm->mm, pos, 1, gup_flags, + &page, NULL); if (ret <= 0) return NULL; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 714cd37a6ba3..6599c6124552 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -76,7 +76,7 @@ static bool dentry_connected(struct dentry *dentry) struct dentry *parent = dget_parent(dentry); dput(dentry); - if (IS_ROOT(dentry)) { + if (dentry == parent) { dput(parent); return false; } diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 7a182c87f378..ab1d7f35f6c2 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -715,6 +715,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob if (awaken) wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); + if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + /* Prevent a race with our last child, which has to signal EV_CLEARED * before dropping our spinlock. diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 1ab19e660e69..1ff5774a5382 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -328,13 +328,14 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 3345c7553edc..7adc8a327e03 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -453,14 +453,15 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. " "bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a17da8b57fc6..ab34f613fa85 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -118,6 +118,16 @@ static void huge_pagevec_release(struct pagevec *pvec) pagevec_reinit(pvec); } +/* + * Mask used when checking the page offset value passed in via system + * calls. This value will be converted to a loff_t which is signed. + * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the + * value. The extra bit (- 1 in the shift value) is to take the sign + * bit into account. + */ +#define PGOFF_LOFFT_MAX \ + (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1))) + static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); @@ -136,17 +146,31 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; + /* + * page based offset in vm_pgoff could be sufficiently large to + * overflow a loff_t when converted to byte offset. This can + * only happen on architectures where sizeof(loff_t) == + * sizeof(unsigned long). So, only check in those instances. + */ + if (sizeof(unsigned long) == sizeof(loff_t)) { + if (vma->vm_pgoff & PGOFF_LOFFT_MAX) + return -EINVAL; + } + + /* must be huge page aligned */ if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); + len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + /* check for overflow */ + if (len < vma_len) + return -EINVAL; mutex_lock(&inode->i_mutex); file_accessed(file); ret = -ENOMEM; - len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, @@ -155,7 +179,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; if (vma->vm_flags & VM_WRITE && inode->i_size < len) - inode->i_size = len; + i_size_write(inode, len); out: mutex_unlock(&inode->i_mutex); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 827fc9809bc2..3494e220b510 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -125,10 +125,10 @@ check_err: check_gen: if (handle->ih_generation != inode->i_generation) { - iput(inode); trace_ocfs2_get_dentry_generation((unsigned long long)blkno, handle->ih_generation, inode->i_generation); + iput(inode); result = ERR_PTR(-ESTALE); goto bail; } diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 124471d26a73..c1a83c58456e 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -156,18 +156,14 @@ out: } /* - * lock allocators, and reserving appropriate number of bits for - * meta blocks and data clusters. - * - * in some cases, we don't need to reserve clusters, just let data_ac - * be NULL. + * lock allocator, and reserve appropriate number of bits for + * meta blocks. */ -static int ocfs2_lock_allocators_move_extents(struct inode *inode, +static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_move, u32 extents_to_split, struct ocfs2_alloc_context **meta_ac, - struct ocfs2_alloc_context **data_ac, int extra_blocks, int *credits) { @@ -192,13 +188,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, goto out; } - if (data_ac) { - ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } - } *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); @@ -260,10 +249,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, - &context->meta_ac, - &context->data_ac, - extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + *len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -286,6 +275,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } + /* + * Make sure ocfs2_reserve_cluster is called after + * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. + * + * If ocfs2_reserve_cluster is called + * before __ocfs2_flush_truncate_log, dead lock on global bitmap + * may happen. + * + */ + ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); + if (ret) { + mlog_errno(ret); + goto out_unlock_mutex; + } + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -606,9 +610,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, - &context->meta_ac, - NULL, extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/proc/base.c b/fs/proc/base.c index 4beed301e224..bd8c26a409a7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -254,7 +254,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); if (rv <= 0) goto out_free_page; @@ -272,7 +272,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -307,7 +307,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -356,7 +356,7 @@ skip_argv: bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -868,6 +868,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, unsigned long addr = *ppos; ssize_t copied; char *page; + unsigned int flags; if (!mm) return 0; @@ -880,6 +881,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; + /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ + flags = FOLL_FORCE; + if (write) + flags |= FOLL_WRITE; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -888,7 +894,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, break; } - this_len = access_remote_vm(mm, addr, page, this_len, write); + this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; @@ -1000,8 +1006,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), - page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 588461bb2dd4..e97e7d74e134 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -392,8 +392,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) } else { spin_lock_irqsave(&psinfo->buf_lock, flags); } - memcpy(psinfo->buf, s, c); - psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); + psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0, + s, 0, c, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 02fa1dcc5969..29f5b2e589a1 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait) } } brelse(bh); - return 0; + return err; } int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) diff --git a/include/linux/mm.h b/include/linux/mm.h index d4e8077fca96..251adf4d8a71 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1191,7 +1191,7 @@ static inline int fixup_user_fault(struct task_struct *tsk, extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write); + void *buf, int len, unsigned int gup_flags); long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -1199,19 +1199,17 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct **vmas, int *nonblocking); long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); + unsigned int gup_flags, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags); + struct page **pages, unsigned int gup_flags); long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); + struct page **pages, unsigned int gup_flags); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); @@ -1229,7 +1227,7 @@ struct frame_vector { struct frame_vector *frame_vector_create(unsigned int nr_frames); void frame_vector_destroy(struct frame_vector *vec); int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, - bool write, bool force, struct frame_vector *vec); + unsigned int gup_flags, struct frame_vector *vec); void put_vaddr_frames(struct frame_vector *vec); int frame_vector_to_pages(struct frame_vector *vec); void frame_vector_to_pfns(struct frame_vector *vec); @@ -2122,6 +2120,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_COW 0x4000 /* internal GUP flag */ +#define FOLL_ANON 0x8000 /* don't do file mappings */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 907f3fd191ac..3e28a1a8d823 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -65,8 +65,8 @@ struct k_itimer { spinlock_t it_lock; clockid_t it_clock; /* which timer type */ timer_t it_id; /* timer id */ - int it_overrun; /* overrun on pending signal */ - int it_overrun_last; /* overrun on last delivered signal */ + s64 it_overrun; /* overrun on pending signal */ + s64 it_overrun_last; /* overrun on last delivered signal */ int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 int it_sigev_notify; /* notify word of sigevent struct */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 8b683841e574..f6017ddc4ded 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -448,6 +448,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { + unsigned int hh_alen = 0; unsigned int seq; int hh_len; @@ -455,16 +456,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; if (likely(hh_len <= HH_DATA_MOD)) { - /* this is inlined by gcc */ - memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); + hh_alen = HH_DATA_MOD; + + /* skb_push() would proceed silently if we have room for + * the unaligned size but not for the aligned size: + * check headroom explicitly. + */ + if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, + HH_DATA_MOD); + } } else { - int hh_alen = HH_DATA_ALIGN(hh_len); + hh_alen = HH_DATA_ALIGN(hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + if (likely(skb_headroom(skb) >= hh_alen)) { + memcpy(skb->data - hh_alen, hh->hh_data, + hh_alen); + } } } while (read_seqretry(&hh->hh_lock, seq)); - skb_push(skb, hh_len); + if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + __skb_push(skb, hh_len); return dev_queue_xmit(skb); } diff --git a/include/sound/pcm.h b/include/sound/pcm.h index b0be09279943..ffc161906d36 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -100,7 +100,7 @@ struct snd_pcm_ops { #endif #define SNDRV_PCM_IOCTL1_RESET 0 -#define SNDRV_PCM_IOCTL1_INFO 1 +/* 1 is absent slot. */ #define SNDRV_PCM_IOCTL1_CHANNEL_INFO 2 #define SNDRV_PCM_IOCTL1_GSTATE 3 #define SNDRV_PCM_IOCTL1_FIFO_SIZE 4 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 35dfa9e9d69e..c43ca9857479 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -191,6 +191,7 @@ struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; + int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ }; @@ -569,10 +570,11 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct verifier_env *env, + struct verifier_state *state, int off, + int size, int value_regno, int insn_idx) { - int i; + int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits */ @@ -587,15 +589,37 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } /* save register state */ - state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - state->regs[value_regno]; - - for (i = 0; i < BPF_REG_SIZE; i++) + state->spilled_regs[spi] = state->regs[value_regno]; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && + !env->allow_ptr_leaks) { + int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; + int soff = (-spi - 1) * BPF_REG_SIZE; + + /* detected reuse of integer stack slot with a pointer + * which means either llvm is reusing stack slot or + * an attacker is trying to exploit CVE-2018-3639 + * (speculative store bypass) + * Have to sanitize that slot with preemptive + * store of zero. + */ + if (*poff && *poff != soff) { + /* disallow programs where single insn stores + * into two different stack slots, since verifier + * cannot sanitize them + */ + verbose("insn %d cannot access two stack slots fp%d and fp%d", + insn_idx, *poff, soff); + return -EINVAL; + } + *poff = soff; + } state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + } } else { /* regular write of data into stack */ - state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + state->spilled_regs[spi] = (struct reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -696,7 +720,7 @@ static bool is_ctx_reg(struct verifier_env *env, int regno) * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { @@ -748,7 +772,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, verbose("attempt to corrupt spilled pointer on stack\n"); return -EACCES; } - err = check_stack_write(state, off, size, value_regno); + err = check_stack_write(env, state, off, size, + value_regno, insn_idx); } else { err = check_stack_read(state, off, size, value_regno); } @@ -760,7 +785,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct verifier_env *env, int insn_idx, struct bpf_insn *insn) { struct reg_state *regs = env->cur_state.regs; int err; @@ -793,13 +818,13 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) } /* check whether atomic_add can read the memory */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); if (err) return err; /* check whether atomic_add can write into the same memory */ - return check_mem_access(env, insn->dst_reg, insn->off, + return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); } @@ -1838,13 +1863,14 @@ static int do_check(struct verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn->src_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg); if (err) return err; - if (BPF_SIZE(insn->code) != BPF_W) { + if (BPF_SIZE(insn->code) != BPF_W && + BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; continue; } @@ -1876,7 +1902,7 @@ static int do_check(struct verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn); + err = check_xadd(env, insn_idx, insn); if (err) return err; insn_idx++; @@ -1895,7 +1921,7 @@ static int do_check(struct verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg); if (err) @@ -1930,7 +1956,7 @@ static int do_check(struct verifier_env *env) } /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); if (err) @@ -2220,13 +2246,43 @@ static int convert_ctx_accesses(struct verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { u32 cnt; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW)) type = BPF_WRITE; else continue; + if (type == BPF_WRITE && + env->insn_aux_data[i + delta].sanitize_stack_off) { + struct bpf_insn patch[] = { + /* Sanitize suspicious stack slot with zero. + * There are no memory dependencies for this store, + * since it's only using frame pointer and immediate + * constant of zero + */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, + env->insn_aux_data[i + delta].sanitize_stack_off, + 0), + /* the original STX instruction will immediately + * overwrite the same stack slot with appropriate value + */ + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7108097fa2f2..aad43c88a668 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, retry: /* Read the page with vaddr into memory */ - ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); + ret = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, &vma); if (ret <= 0) return ret; @@ -1700,7 +1700,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) if (likely(result == 0)) goto out; - result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + result = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &page, NULL); if (result < 0) return result; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 80016b329d94..8fc68e60c795 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -103,7 +103,7 @@ static void bump_cpu_timer(struct k_itimer *timer, continue; timer->it.cpu.expires += incr; - timer->it_overrun += 1 << i; + timer->it_overrun += 1LL << i; delta -= incr; } } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index fc7c37ad90a0..0e6ed2e7d066 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -355,6 +355,17 @@ static __init int init_posix_timers(void) __initcall(init_posix_timers); +/* + * The siginfo si_overrun field and the return value of timer_getoverrun(2) + * are of type int. Clamp the overrun value to INT_MAX + */ +static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval) +{ + s64 sum = timr->it_overrun_last + (s64)baseval; + + return sum > (s64)INT_MAX ? INT_MAX : (int)sum; +} + static void schedule_next_timer(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; @@ -362,12 +373,11 @@ static void schedule_next_timer(struct k_itimer *timr) if (timr->it.real.interval.tv64 == 0) return; - timr->it_overrun += (unsigned int) hrtimer_forward(timer, - timer->base->get_time(), - timr->it.real.interval); + timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), + timr->it.real.interval); timr->it_overrun_last = timr->it_overrun; - timr->it_overrun = -1; + timr->it_overrun = -1LL; ++timr->it_requeue_pending; hrtimer_restart(timer); } @@ -396,7 +406,7 @@ void do_schedule_next_timer(struct siginfo *info) else schedule_next_timer(timr); - info->si_overrun += timr->it_overrun_last; + info->si_overrun = timer_overrun_to_int(timr, info->si_overrun); } if (timr) @@ -491,8 +501,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) now = ktime_add(now, kj); } #endif - timr->it_overrun += (unsigned int) - hrtimer_forward(timer, now, + timr->it_overrun += hrtimer_forward(timer, now, timr->it.real.interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; @@ -633,7 +642,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, it_id_set = IT_ID_SET; new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; - new_timer->it_overrun = -1; + new_timer->it_overrun = -1LL; if (timer_event_spec) { if (copy_from_user(&event, timer_event_spec, sizeof (event))) { @@ -762,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || timr->it_sigev_notify == SIGEV_NONE)) - timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); + timr->it_overrun += hrtimer_forward(timer, now, iv); remaining = __hrtimer_expires_remaining_adjusted(timer, now); /* Return 0 only, when the timer is expired and not pending */ @@ -824,7 +833,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) if (!timr) return -EINVAL; - overrun = timr->it_overrun_last; + overrun = timer_overrun_to_int(timr, 0); unlock_timer(timr, flags); return overrun; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4228fd3682c3..3dd40c736067 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -119,11 +119,13 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) i++; } else if (fmt[i] == 'p' || fmt[i] == 's') { mod[fmt_cnt]++; - i++; - if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + /* disallow any further format extensions */ + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) return -EINVAL; fmt_cnt++; - if (fmt[i - 1] == 's') { + if (fmt[i] == 's') { if (str_seen) /* allow only one '%s' per fmt string */ return -EINVAL; diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a26328ec39f1..bb37541cd441 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -1088,7 +1088,8 @@ void __init debug_objects_mem_init(void) obj_cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0, - SLAB_DEBUG_OBJECTS, NULL); + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, + NULL); if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 771234d050c7..6bc452b33b76 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -17,6 +17,8 @@ * 08/12/11 beckyb Add highmem support */ +#define pr_fmt(fmt) "software IO TLB: " fmt + #include #include #include @@ -143,20 +145,16 @@ static bool no_iotlb_memory; void swiotlb_print_info(void) { unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); + pr_warn("No low mem\n"); return; } - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", + pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n", (unsigned long long)io_tlb_start, (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); + bytes >> 20); } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) @@ -230,7 +228,7 @@ swiotlb_init(int verbose) if (io_tlb_start) memblock_free_early(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); + pr_warn("Cannot allocate buffer"); no_iotlb_memory = true; } @@ -272,8 +270,8 @@ swiotlb_late_init_with_default_size(size_t default_size) return -ENOMEM; } if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + pr_warn("only able to allocate %ld MB\n", + (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; } rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); @@ -680,7 +678,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, return ret; err_warn: - pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + pr_warn("coherent allocation failed for device %s size=%zu\n", dev_name(hwdev), size); dump_stack(); diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 7cf2b7163222..c1e7926a41c4 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -11,10 +11,7 @@ * get_vaddr_frames() - map virtual addresses to pfns * @start: starting user address * @nr_frames: number of pages / pfns from start to map - * @write: whether pages will be written to by the caller - * @force: whether to force write access even if user mapping is - * readonly. See description of the same argument of - get_user_pages(). + * @gup_flags: flags modifying lookup behaviour * @vec: structure which receives pages / pfns of the addresses mapped. * It should have space for at least nr_frames entries. * @@ -34,7 +31,7 @@ * This function takes care of grabbing mmap_sem as necessary. */ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, - bool write, bool force, struct frame_vector *vec) + unsigned int gup_flags, struct frame_vector *vec) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -59,7 +56,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, vec->got_ref = true; vec->is_pfns = false; ret = get_user_pages_locked(current, mm, start, nr_frames, - write, force, (struct page **)(vec->ptrs), &locked); + gup_flags, (struct page **)(vec->ptrs), &locked); goto out; } diff --git a/mm/gup.c b/mm/gup.c index 018144c4b9ec..2cd3b31e3666 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -368,6 +368,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (vm_flags & (VM_IO | VM_PFNMAP)) return -EFAULT; + if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma)) + return -EFAULT; + if (gup_flags & FOLL_WRITE) { if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) @@ -627,7 +630,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, struct vm_area_struct **vmas, int *locked, bool notify_drop, @@ -645,10 +647,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, if (pages) flags |= FOLL_GET; - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; pages_done = 0; lock_dropped = false; @@ -742,11 +740,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, */ long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, locked, true, FOLL_TOUCH); + return __get_user_pages_locked(tsk, mm, start, nr_pages, + pages, NULL, locked, true, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_locked); @@ -762,14 +761,14 @@ EXPORT_SYMBOL(get_user_pages_locked); */ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; int locked = 1; + down_read(&mm->mmap_sem); - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, &locked, false, gup_flags); + ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, + &locked, false, gup_flags); if (locked) up_read(&mm->mmap_sem); return ret; @@ -795,10 +794,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); */ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { - return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write, - force, pages, FOLL_TOUCH); + return __get_user_pages_unlocked(tsk, mm, start, nr_pages, + pages, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -858,11 +857,13 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. */ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, int write, - int force, struct page **pages, struct vm_area_struct **vmas) + unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, vmas, NULL, false, FOLL_TOUCH); + return __get_user_pages_locked(tsk, mm, start, nr_pages, + pages, vmas, NULL, false, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -1411,7 +1412,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pages += nr; ret = get_user_pages_unlocked(current, mm, start, - nr_pages - nr, write, 0, pages); + nr_pages - nr, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6f99a0f906bb..f1a45f5077fe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4053,6 +4053,14 @@ int hugetlb_reserve_pages(struct inode *inode, struct resv_map *resv_map; long gbl_reserve; + /* This should never happen */ + if (from > to) { +#ifdef CONFIG_DEBUG_VM + WARN(1, "%s called with a negative range\n", __func__); +#endif + return -EINVAL; + } + /* * Only apply hugepage reservation if asked. At fault time, an * attempt will be made for VM_NORESERVE to allocate a page @@ -4142,7 +4150,9 @@ int hugetlb_reserve_pages(struct inode *inode, return 0; out_err: if (!vma || vma->vm_flags & VM_MAYSHARE) - region_abort(resv_map, from, to); + /* Don't call region_abort if region_chg failed */ + if (chg >= 0) + region_abort(resv_map, from, to); if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) kref_put(&resv_map->refs, resv_map_release); return ret; diff --git a/mm/memory.c b/mm/memory.c index 5aee9ec8b8c6..fa752df6dc85 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3711,10 +3711,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * given task for page fault accounting. */ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; void *old_buf = buf; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ @@ -3724,7 +3725,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, struct page *page = NULL; ret = get_user_pages(tsk, mm, addr, 1, - write, 1, &page, &vma); + gup_flags, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; @@ -3776,14 +3777,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* @@ -3796,12 +3797,17 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, { struct mm_struct *mm; int ret; + unsigned int flags = FOLL_FORCE; mm = get_task_mm(tsk); if (!mm) return 0; - ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + if (write) + flags |= FOLL_WRITE; + + ret = __access_remote_vm(tsk, mm, addr, buf, len, flags); + mmput(mm); return ret; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index be9840bf11d1..44134ba6fb53 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -818,7 +818,7 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr) struct page *p; int err; - err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL); + err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, &p, NULL); if (err >= 0) { err = page_to_nid(p); put_page(p); diff --git a/mm/nommu.c b/mm/nommu.c index 92be862c859b..2360546db065 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -184,40 +184,32 @@ finish_or_fault: */ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - int flags = 0; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - - return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas, - NULL); + return __get_user_pages(tsk, mm, start, nr_pages, + gup_flags, pages, vmas, NULL); } EXPORT_SYMBOL(get_user_pages); long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { - return get_user_pages(tsk, mm, start, nr_pages, write, force, + return get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, NULL); } EXPORT_SYMBOL(get_user_pages_locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; down_read(&mm->mmap_sem); - ret = get_user_pages(tsk, mm, start, nr_pages, write, force, - pages, NULL); + ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, + NULL, NULL); up_read(&mm->mmap_sem); return ret; } @@ -225,10 +217,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { - return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write, - force, pages, 0); + return __get_user_pages_unlocked(tsk, mm, start, nr_pages, + pages, gup_flags); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -1937,9 +1929,10 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) EXPORT_SYMBOL(filemap_map_pages); static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); @@ -1974,14 +1967,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* @@ -1999,7 +1992,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in if (!mm) return 0; - len = __access_remote_vm(tsk, mm, addr, buf, len, write); + len = __access_remote_vm(tsk, mm, addr, buf, len, + write ? FOLL_WRITE : 0); mmput(mm); return len; diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 5d453e58ddbf..1b5a6104c5fc 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -88,19 +88,23 @@ static int process_vm_rw_single_vec(unsigned long addr, ssize_t rc = 0; unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES / sizeof(struct pages *); + unsigned int flags = 0; /* Work out address and page range required */ if (len == 0) return 0; nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; + if (vm_write) + flags |= FOLL_WRITE; + while (!rc && nr_pages && iov_iter_count(iter)) { int pages = min(nr_pages, max_pages_per_loop); size_t bytes; /* Get the pages we're interested in */ pages = get_user_pages_unlocked(task, mm, pa, pages, - vm_write, 0, process_pages); + process_pages, flags); if (pages <= 0) return -EFAULT; diff --git a/mm/util.c b/mm/util.c index 5fae5b9c2885..db39235970c6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -278,7 +278,7 @@ int __weak get_user_pages_fast(unsigned long start, { struct mm_struct *mm = current->mm; return get_user_pages_unlocked(current, mm, start, nr_pages, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); } EXPORT_SYMBOL_GPL(get_user_pages_fast); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index d4f5f220a8e5..28453d698d86 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -26,7 +26,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data, while (got < num_pages) { rc = get_user_pages_unlocked(current, current->mm, (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, write_page, 0, pages + got); + num_pages - got, pages + got, write_page ? FOLL_WRITE : 0); if (rc < 0) break; BUG_ON(rc == 0); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d2a46ffe6382..d52b633164c9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2931,6 +2931,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, { int err; + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + netif_addr_lock_bh(dev); err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc); if (err) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2d3c9df8d75c..b55b8954dae5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2263,14 +2263,18 @@ void tcp_send_loss_probe(struct sock *sk) skb = tcp_write_queue_tail(sk); } + if (unlikely(!skb)) { + WARN_ONCE(tp->packets_out, + "invalid inflight: %u state %u cwnd %u mss %d\n", + tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + inet_csk(sk)->icsk_pending = 0; + return; + } + /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; - /* Retransmit last segment. */ - if (WARN_ON(!skb)) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 530b62fd6b64..f8cca81d66f2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -169,37 +169,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; - if (opt) { - unsigned int head_room; + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + if (opt) + head_room += opt->opt_nflen + opt->opt_flen; - /* First: exthdrs may take lots of space (~8K for now) - MAX_HEADER is not enough. - */ - head_room = opt->opt_nflen + opt->opt_flen; - seg_len += head_room; - head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); - - if (skb_headroom(skb) < head_room) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return -ENOBUFS; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; + if (unlikely(skb_headroom(skb) < head_room)) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); + if (!skb2) { + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -ENOBUFS; } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + consume_skb(skb); + skb = skb2; + } + + if (opt) { + seg_len += opt->opt_nflen + opt->opt_flen; + if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); + if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 743ff23885da..7acf1f2b8dfc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -432,6 +432,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) int count = 1; int rc = NET_XMIT_SUCCESS; + /* Do not fool qdisc_drop_all() */ + skb->prev = NULL; + /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 38651454ed08..6f388e77999c 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -874,7 +874,8 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, } /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */ #ifdef CONFIG_MMU - if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0) + if (get_user_pages(current, bprm->mm, pos, 1, + FOLL_FORCE, &page, NULL) <= 0) return false; #else page = bprm->page[pos / PAGE_SIZE]; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 5bc7ddf8fc70..3ce2b8771762 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1849,8 +1849,6 @@ int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg) { switch (cmd) { - case SNDRV_PCM_IOCTL1_INFO: - return 0; case SNDRV_PCM_IOCTL1_RESET: return snd_pcm_lib_ioctl_reset(substream, arg); case SNDRV_PCM_IOCTL1_CHANNEL_INFO: diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 0ad194002c0c..9b6dcdea4431 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -214,11 +214,7 @@ int snd_pcm_info(struct snd_pcm_substream *substream, struct snd_pcm_info *info) info->subdevices_avail = pstr->substream_count - pstr->substream_opened; strlcpy(info->subname, substream->name, sizeof(info->subname)); runtime = substream->runtime; - /* AB: FIXME!!! This is definitely nonsense */ - if (runtime) { - info->sync = runtime->sync; - substream->ops->ioctl(substream, SNDRV_PCM_IOCTL1_INFO, info); - } + return 0; } diff --git a/sound/soc/omap/omap-dmic.c b/sound/soc/omap/omap-dmic.c index 09db2aec12a3..776e809a8aab 100644 --- a/sound/soc/omap/omap-dmic.c +++ b/sound/soc/omap/omap-dmic.c @@ -48,6 +48,8 @@ struct omap_dmic { struct device *dev; void __iomem *io_base; struct clk *fclk; + struct pm_qos_request pm_qos_req; + int latency; int fclk_freq; int out_freq; int clk_div; @@ -124,6 +126,8 @@ static void omap_dmic_dai_shutdown(struct snd_pcm_substream *substream, mutex_lock(&dmic->mutex); + pm_qos_remove_request(&dmic->pm_qos_req); + if (!dai->active) dmic->active = 0; @@ -226,6 +230,8 @@ static int omap_dmic_dai_hw_params(struct snd_pcm_substream *substream, /* packet size is threshold * channels */ dma_data = snd_soc_dai_get_dma_data(dai, substream); dma_data->maxburst = dmic->threshold * channels; + dmic->latency = (OMAP_DMIC_THRES_MAX - dmic->threshold) * USEC_PER_SEC / + params_rate(params); return 0; } @@ -236,6 +242,9 @@ static int omap_dmic_dai_prepare(struct snd_pcm_substream *substream, struct omap_dmic *dmic = snd_soc_dai_get_drvdata(dai); u32 ctrl; + if (pm_qos_request_active(&dmic->pm_qos_req)) + pm_qos_update_request(&dmic->pm_qos_req, dmic->latency); + /* Configure uplink threshold */ omap_dmic_write(dmic, OMAP_DMIC_FIFO_CTRL_REG, dmic->threshold); diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index 8d0d45d330e7..8eb2d12b6a34 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -54,6 +54,8 @@ struct omap_mcpdm { unsigned long phys_base; void __iomem *io_base; int irq; + struct pm_qos_request pm_qos_req; + int latency[2]; struct mutex mutex; @@ -273,6 +275,9 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; mutex_lock(&mcpdm->mutex); @@ -285,6 +290,14 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, } } + if (mcpdm->latency[stream2]) + pm_qos_update_request(&mcpdm->pm_qos_req, + mcpdm->latency[stream2]); + else if (mcpdm->latency[stream1]) + pm_qos_remove_request(&mcpdm->pm_qos_req); + + mcpdm->latency[stream1] = 0; + mutex_unlock(&mcpdm->mutex); } @@ -296,7 +309,7 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, int stream = substream->stream; struct snd_dmaengine_dai_dma_data *dma_data; u32 threshold; - int channels; + int channels, latency; int link_mask = 0; channels = params_channels(params); @@ -336,14 +349,25 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, dma_data->maxburst = (MCPDM_DN_THRES_MAX - threshold) * channels; + latency = threshold; } else { /* If playback is not running assume a stereo stream to come */ if (!mcpdm->config[!stream].link_mask) mcpdm->config[!stream].link_mask = (0x3 << 3); dma_data->maxburst = threshold * channels; + latency = (MCPDM_DN_THRES_MAX - threshold); } + /* + * The DMA must act to a DMA request within latency time (usec) to avoid + * under/overflow + */ + mcpdm->latency[stream] = latency * USEC_PER_SEC / params_rate(params); + + if (!mcpdm->latency[stream]) + mcpdm->latency[stream] = 10; + /* Check if we need to restart McPDM with this stream */ if (mcpdm->config[stream].link_mask && mcpdm->config[stream].link_mask != link_mask) @@ -358,6 +382,20 @@ static int omap_mcpdm_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + struct pm_qos_request *pm_qos_req = &mcpdm->pm_qos_req; + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; + int latency = mcpdm->latency[stream2]; + + /* Prevent omap hardware from hitting off between FIFO fills */ + if (!latency || mcpdm->latency[stream1] < latency) + latency = mcpdm->latency[stream1]; + + if (pm_qos_request_active(pm_qos_req)) + pm_qos_update_request(pm_qos_req, latency); + else if (latency) + pm_qos_add_request(pm_qos_req, PM_QOS_CPU_DMA_LATENCY, latency); if (!omap_mcpdm_active(mcpdm)) { omap_mcpdm_start(mcpdm); @@ -419,6 +457,9 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) free_irq(mcpdm->irq, (void *)mcpdm); pm_runtime_disable(mcpdm->dev); + if (pm_qos_request_active(&mcpdm->pm_qos_req)) + pm_qos_remove_request(&mcpdm->pm_qos_req); + return 0; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index fa6b74a304a7..b927f9c81d92 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1711,6 +1711,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) } card->instantiated = 1; + dapm_mark_endpoints_dirty(card); snd_soc_dapm_sync(&card->dapm); mutex_unlock(&card->mutex); mutex_unlock(&client_mutex); diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore new file mode 100644 index 000000000000..9e69e982fb38 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/.gitignore @@ -0,0 +1,3 @@ +timestamping +txtimestamp +hwtstamp_config diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile new file mode 100644 index 000000000000..ccbb9edbbbb9 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -0,0 +1,8 @@ +TEST_PROGS := hwtstamp_config timestamping txtimestamp + +all: $(TEST_PROGS) + +include ../../lib.mk + +clean: + rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c new file mode 100644 index 000000000000..e8b685a7f15f --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c @@ -0,0 +1,134 @@ +/* Test program for SIOC{G,S}HWTSTAMP + * Copyright 2013 Solarflare Communications + * Author: Ben Hutchings + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +static int +lookup_value(const char **names, int size, const char *name) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value] && strcasecmp(names[value], name) == 0) + return value; + + return -1; +} + +static const char * +lookup_name(const char **names, int size, int value) +{ + return (value >= 0 && value < size) ? names[value] : NULL; +} + +static void list_names(FILE *f, const char **names, int size) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value]) + fprintf(f, " %s\n", names[value]); +} + +static const char *tx_types[] = { +#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name + TX_TYPE(OFF), + TX_TYPE(ON), + TX_TYPE(ONESTEP_SYNC) +#undef TX_TYPE +}; +#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0]))) + +static const char *rx_filters[] = { +#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name + RX_FILTER(NONE), + RX_FILTER(ALL), + RX_FILTER(SOME), + RX_FILTER(PTP_V1_L4_EVENT), + RX_FILTER(PTP_V1_L4_SYNC), + RX_FILTER(PTP_V1_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L4_EVENT), + RX_FILTER(PTP_V2_L4_SYNC), + RX_FILTER(PTP_V2_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L2_EVENT), + RX_FILTER(PTP_V2_L2_SYNC), + RX_FILTER(PTP_V2_L2_DELAY_REQ), + RX_FILTER(PTP_V2_EVENT), + RX_FILTER(PTP_V2_SYNC), + RX_FILTER(PTP_V2_DELAY_REQ), +#undef RX_FILTER +}; +#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0]))) + +static void usage(void) +{ + fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n" + "tx_type is any of (case-insensitive):\n", + stderr); + list_names(stderr, tx_types, N_TX_TYPES); + fputs("rx_filter is any of (case-insensitive):\n", stderr); + list_names(stderr, rx_filters, N_RX_FILTERS); +} + +int main(int argc, char **argv) +{ + struct ifreq ifr; + struct hwtstamp_config config; + const char *name; + int sock; + + if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) { + usage(); + return 2; + } + + if (argc == 4) { + config.flags = 0; + config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]); + config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]); + if (config.tx_type < 0 || config.rx_filter < 0) { + usage(); + return 2; + } + } + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + perror("socket"); + return 1; + } + + strcpy(ifr.ifr_name, argv[1]); + ifr.ifr_data = (caddr_t)&config; + + if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) { + perror("ioctl"); + return 1; + } + + printf("flags = %#x\n", config.flags); + name = lookup_name(tx_types, N_TX_TYPES, config.tx_type); + if (name) + printf("tx_type = %s\n", name); + else + printf("tx_type = %d\n", config.tx_type); + name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter); + if (name) + printf("rx_filter = %s\n", name); + else + printf("rx_filter = %d\n", config.rx_filter); + + return 0; +} diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c new file mode 100644 index 000000000000..5cdfd743447b --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/timestamping.c @@ -0,0 +1,528 @@ +/* + * This program demonstrates how the various time stamping features in + * the Linux kernel work. It emulates the behavior of a PTP + * implementation in stand-alone master mode by sending PTPv1 Sync + * multicasts once every second. It looks for similar packets, but + * beyond that doesn't actually implement PTP. + * + * Outgoing packets are time stamped with SO_TIMESTAMPING with or + * without hardware support. + * + * Incoming packets are time stamped with SO_TIMESTAMPING with or + * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and + * SO_TIMESTAMP[NS]. + * + * Copyright (C) 2009 Intel Corporation. + * Author: Patrick Ohly + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifndef SO_TIMESTAMPING +# define SO_TIMESTAMPING 37 +# define SCM_TIMESTAMPING SO_TIMESTAMPING +#endif + +#ifndef SO_TIMESTAMPNS +# define SO_TIMESTAMPNS 35 +#endif + +#ifndef SIOCGSTAMPNS +# define SIOCGSTAMPNS 0x8907 +#endif + +#ifndef SIOCSHWTSTAMP +# define SIOCSHWTSTAMP 0x89b0 +#endif + +static void usage(const char *error) +{ + if (error) + printf("invalid option: %s\n", error); + printf("timestamping interface option*\n\n" + "Options:\n" + " IP_MULTICAST_LOOP - looping outgoing multicasts\n" + " SO_TIMESTAMP - normal software time stamping, ms resolution\n" + " SO_TIMESTAMPNS - more accurate software time stamping\n" + " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n" + " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n" + " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" + " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" + " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" + " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SIOCGSTAMP - check last socket time stamp\n" + " SIOCGSTAMPNS - more accurate socket time stamp\n"); + exit(1); +} + +static void bail(const char *error) +{ + printf("%s: %s\n", error, strerror(errno)); + exit(1); +} + +static const unsigned char sync[] = { + 0x00, 0x01, 0x00, 0x01, + 0x5f, 0x44, 0x46, 0x4c, + 0x54, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x01, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, + 0x49, 0x05, 0xcd, 0x01, + 0x29, 0xb1, 0x8d, 0xb0, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +{ + struct timeval now; + int res; + + res = sendto(sock, sync, sizeof(sync), 0, + addr, addr_len); + gettimeofday(&now, 0); + if (res < 0) + printf("%s: %s\n", "send", strerror(errno)); + else + printf("%ld.%06ld: sent %d bytes\n", + (long)now.tv_sec, (long)now.tv_usec, + res); +} + +static void printpacket(struct msghdr *msg, int res, + char *data, + int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + struct cmsghdr *cmsg; + struct timeval tv; + struct timespec ts; + struct timeval now; + + gettimeofday(&now, 0); + + printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n", + (long)now.tv_sec, (long)now.tv_usec, + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + res, + inet_ntoa(from_addr->sin_addr), + msg->msg_controllen); + for (cmsg = CMSG_FIRSTHDR(msg); + cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + printf(" cmsg len %zu: ", cmsg->cmsg_len); + switch (cmsg->cmsg_level) { + case SOL_SOCKET: + printf("SOL_SOCKET "); + switch (cmsg->cmsg_type) { + case SO_TIMESTAMP: { + struct timeval *stamp = + (struct timeval *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMP %ld.%06ld", + (long)stamp->tv_sec, + (long)stamp->tv_usec); + break; + } + case SO_TIMESTAMPNS: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPNS %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + case SO_TIMESTAMPING: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPING "); + printf("SW %ld.%09ld ", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + stamp++; + /* skip deprecated HW transformed */ + stamp++; + printf("HW raw %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + case IPPROTO_IP: + printf("IPPROTO_IP "); + switch (cmsg->cmsg_type) { + case IP_RECVERR: { + struct sock_extended_err *err = + (struct sock_extended_err *)CMSG_DATA(cmsg); + printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s", + strerror(err->ee_errno), + err->ee_origin, +#ifdef SO_EE_ORIGIN_TIMESTAMPING + err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ? + "bounced packet" : "unexpected origin" +#else + "probably SO_EE_ORIGIN_TIMESTAMPING" +#endif + ); + if (res < sizeof(sync)) + printf(" => truncated data?!"); + else if (!memcmp(sync, data + res - sizeof(sync), + sizeof(sync))) + printf(" => GOT OUR DATA BACK (HURRAY!)"); + break; + } + case IP_PKTINFO: { + struct in_pktinfo *pktinfo = + (struct in_pktinfo *)CMSG_DATA(cmsg); + printf("IP_PKTINFO interface index %u", + pktinfo->ipi_ifindex); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + default: + printf("level %d type %d", + cmsg->cmsg_level, + cmsg->cmsg_type); + break; + } + printf("\n"); + } + + if (siocgstamp) { + if (ioctl(sock, SIOCGSTAMP, &tv)) + printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno)); + else + printf("SIOCGSTAMP %ld.%06ld\n", + (long)tv.tv_sec, + (long)tv.tv_usec); + } + if (siocgstampns) { + if (ioctl(sock, SIOCGSTAMPNS, &ts)) + printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno)); + else + printf("SIOCGSTAMPNS %ld.%09ld\n", + (long)ts.tv_sec, + (long)ts.tv_nsec); + } +} + +static void recvpacket(int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + char data[256]; + struct msghdr msg; + struct iovec entry; + struct sockaddr_in from_addr; + struct { + struct cmsghdr cm; + char control[512]; + } control; + int res; + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + entry.iov_base = data; + entry.iov_len = sizeof(data); + msg.msg_name = (caddr_t)&from_addr; + msg.msg_namelen = sizeof(from_addr); + msg.msg_control = &control; + msg.msg_controllen = sizeof(control); + + res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT); + if (res < 0) { + printf("%s %s: %s\n", + "recvmsg", + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + strerror(errno)); + } else { + printpacket(&msg, res, data, + sock, recvmsg_flags, + siocgstamp, siocgstampns); + } +} + +int main(int argc, char **argv) +{ + int so_timestamping_flags = 0; + int so_timestamp = 0; + int so_timestampns = 0; + int siocgstamp = 0; + int siocgstampns = 0; + int ip_multicast_loop = 0; + char *interface; + int i; + int enabled = 1; + int sock; + struct ifreq device; + struct ifreq hwtstamp; + struct hwtstamp_config hwconfig, hwconfig_requested; + struct sockaddr_in addr; + struct ip_mreq imr; + struct in_addr iaddr; + int val; + socklen_t len; + struct timeval next; + + if (argc < 2) + usage(0); + interface = argv[1]; + + for (i = 2; i < argc; i++) { + if (!strcasecmp(argv[i], "SO_TIMESTAMP")) + so_timestamp = 1; + else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) + so_timestampns = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMP")) + siocgstamp = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMPNS")) + siocgstampns = 1; + else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) + ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else + usage(argv[i]); + } + + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (sock < 0) + bail("socket"); + + memset(&device, 0, sizeof(device)); + strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); + if (ioctl(sock, SIOCGIFADDR, &device) < 0) + bail("getting interface IP address"); + + memset(&hwtstamp, 0, sizeof(hwtstamp)); + strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); + hwtstamp.ifr_data = (void *)&hwconfig; + memset(&hwconfig, 0, sizeof(hwconfig)); + hwconfig.tx_type = + (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + hwconfig.rx_filter = + (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; + hwconfig_requested = hwconfig; + if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { + if ((errno == EINVAL || errno == ENOTSUP) && + hwconfig_requested.tx_type == HWTSTAMP_TX_OFF && + hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE) + printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n"); + else + bail("SIOCSHWTSTAMP"); + } + printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n", + hwconfig_requested.tx_type, hwconfig.tx_type, + hwconfig_requested.rx_filter, hwconfig.rx_filter); + + /* bind to PTP port */ + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(319 /* PTP event port */); + if (bind(sock, + (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)) < 0) + bail("bind"); + + /* set multicast group for outgoing packets */ + inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ + addr.sin_addr = iaddr; + imr.imr_multiaddr.s_addr = iaddr.s_addr; + imr.imr_interface.s_addr = + ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr; + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, + &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0) + bail("set multicast"); + + /* join multicast group, loop our own packet */ + if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)) < 0) + bail("join multicast group"); + + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, + &ip_multicast_loop, sizeof(enabled)) < 0) { + bail("loop multicast"); + } + + /* set socket options for time stamping */ + if (so_timestamp && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMP"); + + if (so_timestampns && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMPNS"); + + if (so_timestamping_flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, + &so_timestamping_flags, + sizeof(so_timestamping_flags)) < 0) + bail("setsockopt SO_TIMESTAMPING"); + + /* request IP_PKTINFO for debugging purposes */ + if (setsockopt(sock, SOL_IP, IP_PKTINFO, + &enabled, sizeof(enabled)) < 0) + printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno)); + + /* verify socket options */ + len = sizeof(val); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno)); + else + printf("SO_TIMESTAMP %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS", + strerror(errno)); + else + printf("SO_TIMESTAMPNS %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", + strerror(errno)); + } else { + printf("SO_TIMESTAMPING %d\n", val); + if (val != so_timestamping_flags) + printf(" not the expected value %d\n", + so_timestamping_flags); + } + + /* send packets forever every five seconds */ + gettimeofday(&next, 0); + next.tv_sec = (next.tv_sec + 1) / 5 * 5; + next.tv_usec = 0; + while (1) { + struct timeval now; + struct timeval delta; + long delta_us; + int res; + fd_set readfs, errorfs; + + gettimeofday(&now, 0); + delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 + + (long)(next.tv_usec - now.tv_usec); + if (delta_us > 0) { + /* continue waiting for timeout or data */ + delta.tv_sec = delta_us / 1000000; + delta.tv_usec = delta_us % 1000000; + + FD_ZERO(&readfs); + FD_ZERO(&errorfs); + FD_SET(sock, &readfs); + FD_SET(sock, &errorfs); + printf("%ld.%06ld: select %ldus\n", + (long)now.tv_sec, (long)now.tv_usec, + delta_us); + res = select(sock + 1, &readfs, 0, &errorfs, &delta); + gettimeofday(&now, 0); + printf("%ld.%06ld: select returned: %d, %s\n", + (long)now.tv_sec, (long)now.tv_usec, + res, + res < 0 ? strerror(errno) : "success"); + if (res > 0) { + if (FD_ISSET(sock, &readfs)) + printf("ready for reading\n"); + if (FD_ISSET(sock, &errorfs)) + printf("has error\n"); + recvpacket(sock, 0, + siocgstamp, + siocgstampns); + recvpacket(sock, MSG_ERRQUEUE, + siocgstamp, + siocgstampns); + } + } else { + /* write one packet */ + sendpacket(sock, + (struct sockaddr *)&addr, + sizeof(addr)); + next.tv_sec += 5; + continue; + } + } + + return 0; +} diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c new file mode 100644 index 000000000000..5df07047ca86 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -0,0 +1,549 @@ +/* + * Copyright 2014 Google Inc. + * Author: willemb@google.com (Willem de Bruijn) + * + * Test software tx timestamping, including + * + * - SCHED, SND and ACK timestamps + * - RAW, UDP and TCP + * - IPv4 and IPv6 + * - various packet sizes (to test GSO and TSO) + * + * Consult the command line arguments for help on running + * the various testcases. + * + * This test requires a dummy TCP server. + * A simple `nc6 [-u] -l -p $DESTPORT` will do + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* command line parameters */ +static int cfg_proto = SOCK_STREAM; +static int cfg_ipproto = IPPROTO_TCP; +static int cfg_num_pkts = 4; +static int do_ipv4 = 1; +static int do_ipv6 = 1; +static int cfg_payload_len = 10; +static bool cfg_show_payload; +static bool cfg_do_pktinfo; +static bool cfg_loop_nodata; +static uint16_t dest_port = 9000; + +static struct sockaddr_in daddr; +static struct sockaddr_in6 daddr6; +static struct timespec ts_prev; + +static void __print_timestamp(const char *name, struct timespec *cur, + uint32_t key, int payload_len) +{ + if (!(cur->tv_sec | cur->tv_nsec)) + return; + + fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec / 1000, + key, payload_len); + + if ((ts_prev.tv_sec | ts_prev.tv_nsec)) { + int64_t cur_ms, prev_ms; + + cur_ms = (long) cur->tv_sec * 1000 * 1000; + cur_ms += cur->tv_nsec / 1000; + + prev_ms = (long) ts_prev.tv_sec * 1000 * 1000; + prev_ms += ts_prev.tv_nsec / 1000; + + fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms); + } + + ts_prev = *cur; + fprintf(stderr, "\n"); +} + +static void print_timestamp_usr(void) +{ + struct timespec ts; + struct timeval tv; /* avoid dependency on -lrt */ + + gettimeofday(&tv, NULL); + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * 1000; + + __print_timestamp(" USR", &ts, 0, 0); +} + +static void print_timestamp(struct scm_timestamping *tss, int tstype, + int tskey, int payload_len) +{ + const char *tsname; + + switch (tstype) { + case SCM_TSTAMP_SCHED: + tsname = " ENQ"; + break; + case SCM_TSTAMP_SND: + tsname = " SND"; + break; + case SCM_TSTAMP_ACK: + tsname = " ACK"; + break; + default: + error(1, 0, "unknown timestamp type: %u", + tstype); + } + __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); +} + +/* TODO: convert to check_and_print payload once API is stable */ +static void print_payload(char *data, int len) +{ + int i; + + if (!len) + return; + + if (len > 70) + len = 70; + + fprintf(stderr, "payload: "); + for (i = 0; i < len; i++) + fprintf(stderr, "%02hhx ", data[i]); + fprintf(stderr, "\n"); +} + +static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) +{ + char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN]; + + fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n", + ifindex, + saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown", + daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); +} + +static void __poll(int fd) +{ + struct pollfd pollfd; + int ret; + + memset(&pollfd, 0, sizeof(pollfd)); + pollfd.fd = fd; + ret = poll(&pollfd, 1, 100); + if (ret != 1) + error(1, errno, "poll"); +} + +static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) +{ + struct sock_extended_err *serr = NULL; + struct scm_timestamping *tss = NULL; + struct cmsghdr *cm; + int batch = 0; + + for (cm = CMSG_FIRSTHDR(msg); + cm && cm->cmsg_len; + cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level == SOL_SOCKET && + cm->cmsg_type == SCM_TIMESTAMPING) { + tss = (void *) CMSG_DATA(cm); + } else if ((cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR)) { + serr = (void *) CMSG_DATA(cm); + if (serr->ee_errno != ENOMSG || + serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { + fprintf(stderr, "unknown ip error %d %d\n", + serr->ee_errno, + serr->ee_origin); + serr = NULL; + } + } else if (cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_PKTINFO) { + struct in_pktinfo *info = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET, info->ipi_ifindex, + &info->ipi_spec_dst, &info->ipi_addr); + } else if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET6, info6->ipi6_ifindex, + NULL, &info6->ipi6_addr); + } else + fprintf(stderr, "unknown cmsg %d,%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (serr && tss) { + print_timestamp(tss, serr->ee_info, serr->ee_data, + payload_len); + serr = NULL; + tss = NULL; + batch++; + } + } + + if (batch > 1) + fprintf(stderr, "batched %d timestamps\n", batch); +} + +static int recv_errmsg(int fd) +{ + static char ctrl[1024 /* overprovision*/]; + static struct msghdr msg; + struct iovec entry; + static char *data; + int ret = 0; + + data = malloc(cfg_payload_len); + if (!data) + error(1, 0, "malloc"); + + memset(&msg, 0, sizeof(msg)); + memset(&entry, 0, sizeof(entry)); + memset(ctrl, 0, sizeof(ctrl)); + + entry.iov_base = data; + entry.iov_len = cfg_payload_len; + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno != EAGAIN) + error(1, errno, "recvmsg"); + + if (ret >= 0) { + __recv_errmsg_cmsg(&msg, ret); + if (cfg_show_payload) + print_payload(data, cfg_payload_len); + } + + free(data); + return ret == -1; +} + +static void do_test(int family, unsigned int opt) +{ + char *buf; + int fd, i, val = 1, total_len; + + if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { + /* due to lack of checksum generation code */ + fprintf(stderr, "test: skipping datagram over IPv6\n"); + return; + } + + total_len = cfg_payload_len; + if (cfg_proto == SOCK_RAW) { + total_len += sizeof(struct udphdr); + if (cfg_ipproto == IPPROTO_RAW) + total_len += sizeof(struct iphdr); + } + + buf = malloc(total_len); + if (!buf) + error(1, 0, "malloc"); + + fd = socket(family, cfg_proto, cfg_ipproto); + if (fd < 0) + error(1, errno, "socket"); + + if (cfg_proto == SOCK_STREAM) { + if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, + (char*) &val, sizeof(val))) + error(1, 0, "setsockopt no nagle"); + + if (family == PF_INET) { + if (connect(fd, (void *) &daddr, sizeof(daddr))) + error(1, errno, "connect ipv4"); + } else { + if (connect(fd, (void *) &daddr6, sizeof(daddr6))) + error(1, errno, "connect ipv6"); + } + } + + if (cfg_do_pktinfo) { + if (family == AF_INET6) { + if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv6"); + } else { + if (setsockopt(fd, SOL_IP, IP_PKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv4"); + } + } + + opt |= SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | + SOF_TIMESTAMPING_OPT_ID; + if (cfg_loop_nodata) + opt |= SOF_TIMESTAMPING_OPT_TSONLY; + + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, + (char *) &opt, sizeof(opt))) + error(1, 0, "setsockopt timestamping"); + + for (i = 0; i < cfg_num_pkts; i++) { + memset(&ts_prev, 0, sizeof(ts_prev)); + memset(buf, 'a' + i, total_len); + + if (cfg_proto == SOCK_RAW) { + struct udphdr *udph; + int off = 0; + + if (cfg_ipproto == IPPROTO_RAW) { + struct iphdr *iph = (void *) buf; + + memset(iph, 0, sizeof(*iph)); + iph->ihl = 5; + iph->version = 4; + iph->ttl = 2; + iph->daddr = daddr.sin_addr.s_addr; + iph->protocol = IPPROTO_UDP; + /* kernel writes saddr, csum, len */ + + off = sizeof(*iph); + } + + udph = (void *) buf + off; + udph->source = ntohs(9000); /* random spoof */ + udph->dest = ntohs(dest_port); + udph->len = ntohs(sizeof(*udph) + cfg_payload_len); + udph->check = 0; /* not allowed for IPv6 */ + } + + print_timestamp_usr(); + if (cfg_proto != SOCK_STREAM) { + if (family == PF_INET) + val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr)); + else + val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6)); + } else { + val = send(fd, buf, cfg_payload_len, 0); + } + if (val != total_len) + error(1, errno, "send"); + + /* wait for all errors to be queued, else ACKs arrive OOO */ + usleep(50 * 1000); + + __poll(fd); + + while (!recv_errmsg(fd)) {} + } + + if (close(fd)) + error(1, errno, "close"); + + free(buf); + usleep(400 * 1000); +} + +static void __attribute__((noreturn)) usage(const char *filepath) +{ + fprintf(stderr, "\nUsage: %s [options] hostname\n" + "\nwhere options are:\n" + " -4: only IPv4\n" + " -6: only IPv6\n" + " -h: show this message\n" + " -I: request PKTINFO\n" + " -l N: send N bytes at a time\n" + " -n: set no-payload option\n" + " -r: use raw\n" + " -R: use raw (IP_HDRINCL)\n" + " -p N: connect to port N\n" + " -u: use udp\n" + " -x: show payload (up to 70 bytes)\n", + filepath); + exit(1); +} + +static void parse_opt(int argc, char **argv) +{ + int proto_count = 0; + char c; + + while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) { + switch (c) { + case '4': + do_ipv6 = 0; + break; + case '6': + do_ipv4 = 0; + break; + case 'I': + cfg_do_pktinfo = true; + break; + case 'n': + cfg_loop_nodata = true; + break; + case 'r': + proto_count++; + cfg_proto = SOCK_RAW; + cfg_ipproto = IPPROTO_UDP; + break; + case 'R': + proto_count++; + cfg_proto = SOCK_RAW; + cfg_ipproto = IPPROTO_RAW; + break; + case 'u': + proto_count++; + cfg_proto = SOCK_DGRAM; + cfg_ipproto = IPPROTO_UDP; + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 10); + break; + case 'p': + dest_port = strtoul(optarg, NULL, 10); + break; + case 'x': + cfg_show_payload = true; + break; + case 'h': + default: + usage(argv[0]); + } + } + + if (!cfg_payload_len) + error(1, 0, "payload may not be nonzero"); + if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472) + error(1, 0, "udp packet might exceed expected MTU"); + if (!do_ipv4 && !do_ipv6) + error(1, 0, "pass -4 or -6, not both"); + if (proto_count > 1) + error(1, 0, "pass -r, -R or -u, not multiple"); + + if (optind != argc - 1) + error(1, 0, "missing required hostname argument"); +} + +static void resolve_hostname(const char *hostname) +{ + struct addrinfo *addrs, *cur; + int have_ipv4 = 0, have_ipv6 = 0; + + if (getaddrinfo(hostname, NULL, NULL, &addrs)) + error(1, errno, "getaddrinfo"); + + cur = addrs; + while (cur && !have_ipv4 && !have_ipv6) { + if (!have_ipv4 && cur->ai_family == AF_INET) { + memcpy(&daddr, cur->ai_addr, sizeof(daddr)); + daddr.sin_port = htons(dest_port); + have_ipv4 = 1; + } + else if (!have_ipv6 && cur->ai_family == AF_INET6) { + memcpy(&daddr6, cur->ai_addr, sizeof(daddr6)); + daddr6.sin6_port = htons(dest_port); + have_ipv6 = 1; + } + cur = cur->ai_next; + } + if (addrs) + freeaddrinfo(addrs); + + do_ipv4 &= have_ipv4; + do_ipv6 &= have_ipv6; +} + +static void do_main(int family) +{ + fprintf(stderr, "family: %s\n", + family == PF_INET ? "INET" : "INET6"); + + fprintf(stderr, "test SND\n"); + do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); + + fprintf(stderr, "test ENQ\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED); + + fprintf(stderr, "test ENQ + SND\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE); + + if (cfg_proto == SOCK_STREAM) { + fprintf(stderr, "\ntest ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_ACK); + + fprintf(stderr, "\ntest SND + ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK); + + fprintf(stderr, "\ntest ENQ + SND + ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK); + } +} + +const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" }; + +int main(int argc, char **argv) +{ + if (argc == 1) + usage(argv[0]); + + parse_opt(argc, argv); + resolve_hostname(argv[argc - 1]); + + fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]); + fprintf(stderr, "payload: %u\n", cfg_payload_len); + fprintf(stderr, "server port: %u\n", dest_port); + fprintf(stderr, "\n"); + + if (do_ipv4) + do_main(PF_INET); + if (do_ipv6) + do_main(PF_INET6); + + return 0; +} diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 4f70d12e392d..eddce59986ee 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL); + get_user_pages_unlocked(NULL, mm, addr, 1, NULL, FOLL_WRITE); kvm_async_page_present_sync(vcpu, apf); spin_lock(&vcpu->async_pf.lock); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b814ae6822b6..e4be695eb789 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1352,10 +1352,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, npages = get_user_page_nowait(current, current->mm, addr, write_fault, page); up_read(¤t->mm->mmap_sem); - } else + } else { + unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON; + + if (write_fault) + flags |= FOLL_WRITE; + npages = __get_user_pages_unlocked(current, current->mm, addr, 1, - write_fault, 0, page, - FOLL_TOUCH|FOLL_HWPOISON); + page, flags); + } if (npages != 1) return npages; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a83798cc448b..611bf28851c6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -141,7 +141,7 @@ static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) struct page *page; pages_pinned = get_user_pages(current, mm, page_start_vaddr, - 1, 0, 0, &page, NULL); + 1, 0, &page, NULL); if (pages_pinned < 1) { seq_puts(m, "]"); return;